Beispiel #1
0
    def cleanup_report_table(self):
        """ 
		Converts table from mc_tabulation to a more readable form, intended
		to be opened in Excel, which leaves a cell visually empty for ="".
		"""
        cleaned_table = []
        for line in self.pre_report:
            # No mutations
            if any(['NO MUTATIONS' in i for i in line]):
                cleaned_table.append(line)

            # Mutations
            else:
                # Sorting
                line[-1] = sorted(line[-1], key=iget(1, 0), reverse=True)

                # Converting from list to lines
                for mutation in line[-1]:
                    # Sorting
                    mutation[-1] = sorted(mutation[-1], key=iget(0, 2))

                    # Converting from list to lines
                    for s in mutation[-1]:
                        first_part = ['=""'] * len(line[:-1])
                        second_part = ['=""'] * len(mutation[:-1])
                        if s == mutation[-1][0]:
                            second_part = mutation[:-1]
                            if mutation == line[-1][0]:
                                first_part = line[:-1]
                        third_part = s

                        cleaned_table.append(first_part + second_part +
                                             third_part)

        return cleaned_table
Beispiel #2
0
def to_qcr_format(rec, job, campaign_thresh = 0.7, debug=False):
    if debug:
        print "Start conv, doing location"
        print "rec['location'] = ", rec['location']

    loc = sorted(rec['location'], key=iget('weight'), reverse=True)
    o_loc = None
    if len(loc):
        o_loc = {
            "type": "Point",
            "coordinates": [
                loc[0]["coords"][0]["lng"],
                loc[0]["coords"][0]["lat"]
            ]
        }
    if debug:
        print "Splitting campaigns"
    l_rec = []
    camps = filter(lambda x: x is not None, map(lambda x: [y for y in x.iteritems()][0] if x.values()[0] > campaign_thresh else None, rec['campaigns']))
    if debug:
        print "Max campaign association:", max([x.values()[0] for x in rec['campaigns']])
        print "n recs to transform: ", len(camps)
    for camp in camps:
        keywords = map(iget(0), sorted(rec['keywords'], key=iget(1), reverse=True))
        hashtags = map(iget(0), sorted(rec['hashtags'], key=iget(1), reverse=True))
        # per QCR: send top kwd if no hashtags
        if not len(hashtags):
            hashtags = [keywords[0]]

        event = {
            'uid': rec['id'],
            'label': rec['hashtags'][0][0] if len(rec['hashtags']) else
                rec['keywords'][0][0] if len(rec['keywords']) else 'None',
            'relevant': True,
            'startDate': datetime.fromtimestamp(rec['start_time_ms']/1000.0).isoformat(),
            'endDate': datetime.fromtimestamp(rec['end_time_ms']/1000.0).isoformat(),
            'hashtags': hashtags,
            'keywords': keywords,
            'urls': rec['urls'],
            'photos': rec['image_urls'],
            'importanceScore': camp[1],
            'topicMessageCount': rec['topic_message_count'],
            'campaignId': camp[0],
            'newsEventIds': [],
            'location': o_loc
        }

        l_rec.append(event)

    return l_rec
def resources_to_contacts():
    # Get Calendar Resources
    calendars = calendar_resource(options=options()).get_resource_feed(
        uri=options().calendar_resource_feed).entry

    # Select Calendars by options
    filtered_calendars = filter(lambda cal: \
        fnmatch(cal.resource_email, options().select_pattern), calendars)

    # Fetch all domain users
    all_users = exhaust(
        admin(options=options()).users().list,
        dict(domain=options().domain, maxResults=500), 'users')

    # Get opt-out lists
    optout_emails_set = set() if not options().undo else get_optout_set(
        options().optout_uri)

    # Select domain users by options
    filtered_users = filtermap(lambda user: fnmatch(user['primaryEmail'], options().user_pattern) and \
                unicode(user['primaryEmail']).lower() not in optout_emails_set,
                iget('primaryEmail'), all_users)

    logging.info(
        'Starting Calendar Resource to Contacts Group copy operation. Selection is "%s" (%d calendar(s)) and target is "%s" (%d user(s))',
        options().select_pattern, len(filtered_calendars),
        options().user_pattern, len(filtered_users))

    process_users(filtered_users, filtered_calendars)
Beispiel #4
0
 def g_apply(operations, source, groupi=None):
     """Applies the specified group operations to the source list of elements and then
     appends it to the group if it is unique.
     """
     result = list(iget(*operations)(source))
     if groupi is not None and result not in groupi:
         groupi.append(result)
     return result
Beispiel #5
0
def query_results_to_tsv(data):
    rows = ((key.split('.', 1)[1], stats['total'], stats['terms'])
            for key, stats in data['facets'].items())

    for key, total, values in sorted(rows, key=iget(1), reverse=True):
        tag, code = key.split('.')
        vals = (u", ".join("%(term)s (%(count)s)" % v for v in values))
        print (u"%s\t%s\t%s\t%s" % (tag, code, total, vals)).encode('utf-8')
Beispiel #6
0
 def sorted_tags():
     p_tags = filter(pass_def_filter, tags.get(symbol, []))
     if not p_tags:
         status_message('Can\'t find "%s"' % symbol)
     p_tags = sorted(p_tags, key=iget('tag_path'))
     if setting('definition_current_first', False):
         p_tags = sorted(p_tags, cmp=definition_cmp)
     return p_tags
Beispiel #7
0
 def sorted_tags():
     p_tags = filter(pass_def_filter, tags.get(symbol, []))
     if not p_tags:
         status_message('Can\'t find "%s"' % symbol)
     p_tags = sorted(p_tags, key=iget('tag_path'))
     if setting('definition_current_first', False):
         p_tags = sorted(p_tags, cmp=definition_cmp)
     return p_tags
Beispiel #8
0
def query_results_to_tsv(data):
    rows = ((key.split('.', 1)[1], stats['total'], stats['terms'])
            for key, stats in data['facets'].items())

    for key, total, values in sorted(rows, key=iget(1), reverse=True):
        tag, code = key.split('.')
        vals = (u", ".join("%(term)s (%(count)s)" % v for v in values))
        print(u"%s\t%s\t%s\t%s" % (tag, code, total, vals)).encode('utf-8')
def find_ellipses(centroids, clusters):
    """
        Returns:
            [(centroid_id,
                ((x_mean, x_std, x_density_normalized),
                 (y_mean, y_std, y_density_normalized))
            )]
    """

    c_density = dict()
    dmx, dmy = list(), list()
    for (c, members) in groupby(sorted(clusters, key=iget(2)), iget(2)):
        xs, ys, _ = zip(*members)

        # ignore outliers
        if len(xs) == 1:
            continue

        # fitting data
        ((xmean, xstd), (ymean, ystd)) = (norm.fit(xs), norm.fit(ys))

        # compute density value (y) in mean point
        probx = normpdf([xmean], xmean, xstd)
        proby = normpdf([ymean], ymean, ystd)
        dmx.append(probx)
        dmy.append(proby)

        # Save clusters mean and std
        c_density[c] = ((xmean, xstd, probx), (ymean, ystd, proby))

    # Compute dataset mean and std in mean points
    xm = (np.nanmean(dmx), np.nanstd(dmx))
    ym = (np.nanmean(dmy), np.nanstd(dmy))

    # Inject normalized density
    return list((c, ((xmean, xstd, fabs(probx - xm[0]) / xm[1]),
                     (ymean, ystd, fabs(proby - ym[0]) / ym[1])))
                for (c, ((xmean, xstd, probx),
                         (ymean, ystd, proby))) in c_density.iteritems())
Beispiel #10
0
def write_games_csv(data, fieldnames, filename='game_stats.csv'):
    """Write out the game overview dict to a CSV file
    """
    gamelist = []
    for game, gamedata in data['games'].items():
        gamelist.append(gamedata)
    # print (gamelist)
    gamelist.sort(key=iget('datecreated'))

    with open(filename, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for game in gamelist:
            game['race'] = get_winner_race(data, game['id'], game['winner'])
            writer.writerow(game)
    def g_apply(operations, source, groupi=None):
        """Applies the specified group operations to the source list of elements and then
        appends it to the group if it is unique.

        Args:
            operations (list): The symmetry group operation.
            source (list): Object for group to act on.
            groupi (list, optional): The growing symmetry group. Default is None.

        Returns:
            result (list): The result of the symmetry group on the object.
        """
        result = list(iget(*operations)(source))
        if groupi is not None and result not in groupi:
            groupi.append(result)
        return result
    def g_apply(operations, source, groupi=None):
        """Applies the specified group operations to the source list of elements and then
        appends it to the group if it is unique.

        Args:
            operations (list): The symmetry group operation.
            source (list): Object for group to act on.
            groupi (list, optional): The growing symmetry group. Default is None.

        Returns:
            result (list): The result of the symmetry group on the object.
        """
        result = list(iget(*operations)(source))
        if groupi is not None and result not in groupi:
            groupi.append(result)
        return result
def resources_to_contacts():
    # Get Calendar Resources
    calendars = calendar_resource(options=options()).get_resource_feed(uri=options().calendar_resource_feed).entry

    # Select Calendars by options
    filtered_calendars = filter(lambda cal: \
        fnmatch(cal.resource_email, options().select_pattern), calendars)

    # Fetch all domain users
    all_users = exhaust(admin(options=options()).users().list, dict(domain=options().domain, maxResults=500), 'users')

    # Get opt-out lists
    optout_emails_set = set() if not options().undo else get_optout_set(options().optout_uri)

    # Select domain users by options
    filtered_users = filtermap(lambda user: fnmatch(user['primaryEmail'], options().user_pattern) and \
                unicode(user['primaryEmail']).lower() not in optout_emails_set,
                iget('primaryEmail'), all_users)

    logging.info('Starting Calendar Resource to Contacts Group copy operation. Selection is "%s" (%d calendar(s)) and target is "%s" (%d user(s))',
        options().select_pattern, len(filtered_calendars), options().user_pattern, len(filtered_users))

    process_users(filtered_users, filtered_calendars)
Beispiel #14
0
 def sorted_tags():
     return sorted(chain(*(tags[k] for k in tags)),
                   key=iget('tag_path'))
Beispiel #15
0
def to_qcr_format(rec, campaign_thresh=0.7, debug=False):
    if debug:
        print "Start conv, doing location"
        print "rec['location'] = ", rec['location']

    loc = sorted(rec['location'], key=iget('weight'), reverse=True)
    o_loc = None
    if len(loc) > 0:
        o_loc = {
            "type": "Point",
            "coordinates":
            [loc[0]["coords"][0]["lng"], loc[0]["coords"][0]["lat"]]
        }
    if debug:
        print "Splitting campaigns"
    l_rec = []
    camps = filter(
        lambda x: x is not None,
        map(
            lambda x: [y for y in x.iteritems()][0]
            if x.values()[0] > campaign_thresh else None, rec['campaigns']))
    if debug:
        print "Max campaign association:", max(
            [x.values()[0] for x in rec['campaigns']])
        print "n recs to transform: ", len(camps)
    for camp in camps:
        keywords = map(iget(0),
                       sorted(rec['keywords'], key=iget(1), reverse=True))
        hashtags = map(iget(0),
                       sorted(rec['hashtags'], key=iget(1), reverse=True))

        l_rec.append({
            'uid':
            rec['id'],
            'label':
            rec['hashtags'][0] if len(rec['hashtags']) > 0 else 'None',
            'startDate':
            datetime.fromtimestamp(rec['start_time_ms'] / 1000.0).isoformat(),
            'endDate':
            datetime.fromtimestamp(rec['end_time_ms'] / 1000.0).isoformat(),
            'domains':
            rec['domains'],
            'hashtags':
            hashtags,
            'keywords':
            keywords,
            'urls':
            rec['urls'],
            'photos':
            rec['image_urls'],
            'importanceScore':
            camp[1],
            'topicMessageCount':
            rec['topic_message_count'],
            'campaignId':
            camp[0],
            'newsEventIds': [],
            'location':
            o_loc
        })
    return l_rec
Beispiel #16
0
 def sorted_tags():
     return sorted(
         chain(*(tags[k] for k in tags)), key=iget('tag_path'))
Beispiel #17
0
 def sorted_tags():
     p_tags = list(filter(pass_def_filter, tags.get(symbol, [])))
     if not p_tags:
         status_message('Can\'t find "%s"' % symbol)
     p_tags = sorted(p_tags, key=iget('tag_path'))
     return p_tags
Beispiel #18
0
 def sorted_tags():
     return sorted (
         chain.from_iterable(tags.itervalues()), key=iget('tag_path'))
Beispiel #19
0
 def reload (self):
     flags = map(iget(0), filter(iget(1), self.formats.items()))
     self.set_text(' '.join(flags))
Beispiel #20
0
 def sorted_tags():
     p_tags = list(filter(pass_def_filter, tags.get(symbol, [])))
     if not p_tags:
         status_message('Can\'t find "%s"' % symbol)
     p_tags = sorted(p_tags, key=iget('tag_path'))
     return p_tags
Beispiel #21
0
 def reload(self):
     flags = map(iget(0), filter(iget(1), self.formats.items()))
     self.set_text(' '.join(flags))
Beispiel #22
0
    def get_communities(self):
        partition = community.best_partition(self.graph)
        d1 = {}

        print "Communities found, getting event summary information"
        n_nodes = len(self.graph.nodes())
        checkpoints = [.1, .25, .5, .75, .9, .95, .99, 1.1]
        ind_checked = 0
        n_checked = 0
        for n in self.graph.nodes():
            n_checked += 1
            while n_checked > checkpoints[ind_checked] * n_nodes:
                ind_checked += 1
                print "Finished {}% of nodes".format(
                    checkpoints[ind_checked - 1] * 100)

            images = set()
            com = str(partition[n])
            if n not in self.nodes_detailed:
                print "{} not found in detailed node list...why????".format(n)
                continue
            clust = self.nodes_detailed[n]
            if com in d1:
                d1[com]['cluster_ids'].append(n)
                d1[com]['topic_message_count'] += len(
                    clust['similar_post_ids'])
            else:
                d1[com] = {
                    'id': str(uuid.uuid4()),
                    'name': 'default',
                    'start_time_ms': clust['start_time_ms'],
                    'end_time_ms': clust['end_time_ms'],
                    'cluster_ids': [n],
                    'hashtags': {},
                    'keywords': {},
                    'campaigns': {
                        "total": 0,
                        'ids': {}
                    },
                    'urls': set([]),
                    'image_urls': [],
                    'location': {},
                    'importance_score': 1.0,
                    'topic_message_count': len(clust['similar_post_ids'])
                }

            #Expand Summary data (hashtags, keywords, images, urls, geo)
            if clust['data_type'] == 'hashtag':
                d1[com]['hashtags'][clust['term']] = len(
                    clust['similar_post_ids'])
                #Add full text analysis, many communities have no image/text nodes
                self.get_text_sum(clust, d1[com])
            elif clust['data_type'] == 'image':
                pass
            elif clust['data_type'] == 'text':
                self.get_text_sum(clust, d1[com])

            images |= self.get_img_sum(clust)

            d1[com]['image_urls'] = list(set(d1[com]['image_urls']) | images)

            #Make Sure Time is Correct
            if clust['start_time_ms'] < d1[com]['start_time_ms']:
                d1[com]['start_time_ms'] = clust['start_time_ms']
            if clust['end_time_ms'] > d1[com]['end_time_ms']:
                d1[com]['end_time_ms'] = clust['end_time_ms']

        print "Information collected, formatting output"

        #Cleanup -> transform dicst to order lists, sets to lists for easy javascript comprehension
        for com in d1.keys():
            l_camps = []
            if d1[com]['campaigns']['total'] != 0:
                l_camps = [{
                    k: 1. * v / float(d1[com]['campaigns']['total'])
                } for k, v in d1[com]['campaigns']['ids'].iteritems()]

            d1[com]['campaigns'] = l_camps

            # l_tags = map(lambda x: x[0], sorted([(k, v) for k, v in d1[com]['hashtags'].iteritems()], key=iget(1)))
            l_tags = sorted(list(d1[com]['hashtags'].iteritems()),
                            key=iget(1),
                            reverse=1)
            d1[com]['hashtags'] = l_tags[:100]  # slice

            # l_terms = map(lambda x: x[0], sorted([(k, v) for k, v in d1[com]['keywords'].iteritems()], key=lambda x: x[1]))
            l_terms = sorted(list(d1[com]['keywords'].iteritems()),
                             key=iget(1),
                             reverse=1)
            d1[com]['keywords'] = l_terms[:100]  # slice

            d1[com]['urls'] = list(d1[com]['urls'])

            temp = []
            for k, v in d1[com]['location'].iteritems():
                dt = v
                dt['label'] = k
                temp.append(dt)
            d1[com]['location'] = temp

        return d1
Beispiel #23
0
def CreateDemands(M):
    """
	Steps to create the demand distributions
	1. Use Demand keys to ensure that all demands in commodity_demand are used
	2. Find any slices not set in DemandDefaultDistribution, and set them based
	on the associated SegFrac slice.
	3. Validate that the DemandDefaultDistribution sums to 1.
	4. Find any per-demand DemandSpecificDistribution values not set, and set
	set them from DemandDefaultDistribution.  Note that this only sets a
	distribution for an end-use demand if the user has *not* specified _any_
	anything for that end-use demand.  Thus, it is up to the user to fully
	specify the distribution, or not.  No in-between.
	 5. Validate that the per-demand distributions sum to 1.
	"""

    # Step 0: some setup for a couple of reusable items

    # iget(2): 2 = magic number to specify the third column.  Currently the
    # demand in the tuple (s, d, dem)
    DSD_dem_getter = iget(2)

    # Step 1
    used_dems = set(dem for p, dem in M.Demand.sparse_iterkeys())
    unused_dems = sorted(M.commodity_demand.difference(used_dems))
    if unused_dems:
        for dem in unused_dems:
            msg = ("Warning: Demand '{}' is unused\n")
            SE.write(msg.format(dem))

    # Step 2
    DDD = M.DemandDefaultDistribution  # Shorter, for us lazy programmer types
    unset_defaults = set(M.SegFrac.sparse_iterkeys())
    unset_defaults.difference_update(DDD.sparse_iterkeys())
    if unset_defaults:
        # Some hackery because Pyomo thinks that this Param is constructed.
        # However, in our view, it is not yet, because we're specifically
        # targeting values that have not yet been constructed, that we know are
        # valid, and that we will need.
        # DDD._constructed = False
        for tslice in unset_defaults:
            DDD[tslice] = M.SegFrac[tslice]
        # DDD._constructed = True

    # Step 3
    total = sum(i for i in DDD.itervalues())
    if abs(value(total) - 1.0) > 0.001:
        # We can't explicitly test for "!= 1.0" because of incremental rounding
        # errors associated with the specification of demand shares by time slice,
        # but we check to make sure it is within the specified tolerance.

        key_padding = max(map(get_str_padding, DDD.sparse_iterkeys()))

        format = "%%-%ds = %%s" % key_padding
        # Works out to something like "%-25s = %s"

        items = sorted(DDD.items())
        items = '\n   '.join(format % (str(k), v) for k, v in items)

        msg = (
            'The values of the DemandDefaultDistribution parameter do not '
            'sum to 1.  The DemandDefaultDistribution specifies how end-use '
            'demands are distributed among the time slices (i.e., time_season, '
            'time_of_day), so together, the data must total to 1.  Current '
            'values:\n   {}\n\tsum = {}')

        raise Exception(msg.format(items, total))

    # Step 4
    DSD = M.DemandSpecificDistribution

    demands_specified = set(
        map(DSD_dem_getter, (i for i in DSD.sparse_iterkeys())))
    unset_demand_distributions = used_dems.difference(demands_specified)
    unset_distributions = set(
        cross_product(M.time_season, M.time_of_day,
                      unset_demand_distributions))

    if unset_distributions:
        # Some hackery because Pyomo thinks that this Param is constructed.
        # However, in our view, it is not yet, because we're specifically
        # targeting values that have not yet been constructed, that we know are
        # valid, and that we will need.
        # DSD._constructed = False
        for s, d, dem in unset_distributions:
            DSD[s, d, dem] = DDD[s, d]
        # DSD._constructed = True

    # Step 5
    for dem in used_dems:
        keys = (k for k in DSD.sparse_iterkeys() if DSD_dem_getter(k) == dem)
        total = sum(DSD[i] for i in keys)

        if abs(value(total) - 1.0) > 0.001:
            # We can't explicitly test for "!= 1.0" because of incremental rounding
            # errors associated with the specification of demand shares by time slice,
            # but we check to make sure it is within the specified tolerance.

            keys = [
                k for k in DSD.sparse_iterkeys() if DSD_dem_getter(k) == dem
            ]
            key_padding = max(map(get_str_padding, keys))

            format = "%%-%ds = %%s" % key_padding
            # Works out to something like "%-25s = %s"

            items = sorted((k, DSD[k]) for k in keys)
            items = '\n   '.join(format % (str(k), v) for k, v in items)

            msg = (
                'The values of the DemandSpecificDistribution parameter do not '
                'sum to 1.  The DemandSpecificDistribution specifies how end-use '
                'demands are distributed per time-slice (i.e., time_season, '
                'time_of_day).  Within each end-use Demand, then, the distribution '
                'must total to 1.\n\n   Demand-specific distribution in error: '
                ' {}\n\n   {}\n\tsum = {}')

            raise Exception(msg.format(dem, items, total))
Beispiel #24
0
#!/usr/bin/env python3
#-*- coding: utf-8 -*-
"""
Project Euler - Problem 14
Longest Collatz sequence
"""

from operator import itemgetter as iget


def collatz(nums):
    d = [1] * (nums + 1)
    for n in range(2, nums + 1):
        i, m = 0, n
        while m >= n:
            m = m // 2 if m % 2 == 0 else 3 * m + 1
            i += 1
        d[n] = i + d[m]
    return d


print(max(enumerate(collatz(1000000)), key=iget(1)))
Beispiel #25
0
def print_summary(data, known=None):
    for mtype, fields in packaged(data, known):
        print '#', mtype
        for field, subfields in sorted(fields, key=iget(0)):
            print field, ", ".join(subfields) if subfields else "(FIXED)"
        print
Beispiel #26
0
 def sorted_tags():
     return sorted(tags.get(symbol, []), key=iget('tag_path'))
Beispiel #27
0
def print_summary(data, known=None):
    for mtype, fields in packaged(data, known):
        print '#', mtype
        for field, subfields in sorted(fields, key=iget(0)):
            print field, ", ".join(subfields) if subfields else "(FIXED)"
        print