def soap_getDataSources(self, ps, **kw): c.log( 'Starting' ) request, response = RobustCommunityAnalysis \ .soap_getDataSources(self, ps, **kw) # Get arguments accessKey = request.get_element_accessKey() if not checkKey(accessKey): raise Exception("Your accessKey is not valid.") # Compute the available datasources response = getDataSourcesResponse() datasources = [] for id, dsrc in DATASOURCES.iteritems(): datasource = response.new_datasources().new_datasource() datasource.set_element_id(id) datasource.set_element_title(dsrc['title']) datasources += [ datasource ] _datasources = response.new_datasources() _datasources.set_element_datasource( datasources ) response.set_element_datasources( _datasources ) # Return response c.log( 'Finished' ) return request, response
def store_cache( dsconf, object, data, arguments=() ): """Stores an object into cache.""" name = get_cache_name( dsconf, object, arguments ) file = open(dsconf['cache_dir']+'/'+name, 'wb') cPickle.dump(data, file, CACHE_PROTOCOL) file.close() c.log( "Stored '{0}' into cache.".format(name) )
def soap_getIndicators(self, ps, **kw): request, response = RobustCommunityAnalysis \ .soap_getIndicators(self, ps, **kw) # Get arguments accessKey = request.get_element_accessKey() dataSourceId = request.get_element_dataSourceId() communityId = request.get_element_communityId() c.log( 'Starting for dataSourceId {} and communityId {}' \ .format(dataSourceId, communityId) ) # Return available indicators mapping response = getIndicatorsResponse() indicators = [] for id, ind in INDICATORS.iteritems(): indicator = response.new_indicators().new_indicator() indicator.set_element_id( id ) indicator.set_element_title( ind['title'] ) indicators += [ indicator ] _indicators = response.new_indicators() _indicators.set_element_indicator( indicators ) response.set_element_indicators( _indicators ) # Return response c.log( 'Finished' ) return request, response
def get_communities(dsname): """Use datasource get_communities() facility.""" c.log( 'Starting' ) # Build the datasource configuration dsconfig = ds.build_conf( dsname, DATASOURCES_CONF ) # Get the cache directory config = ConfigParser.RawConfigParser() config.read(GENERAL_CONF) dsconfig['cache_dir'] = config.get('paths','cache_dir') communities = ds.get_communities( dsconfig ) fields = ds.fields('communities') for community in communities: id = community[fields['id']] title = (community[fields['title']], '(no title)')\ [community[fields['title']] is None] start_date = (community[fields['start_date']], '(no start date)')\ [community[fields['start_date']] is None] end_date = (community[fields['end_date']], '(no end date)')\ [community[fields['end_date']] is None] print '{0}\t{1}\t{2}\t{3}'.format( id, title.encode('UTF-8'), start_date, end_date ) c.log( 'Finished' )
def in_cache( dsconf, object, arguments=() ): """Checks if an object is in cache.""" name = get_cache_name( dsconf, object, arguments ) if os.path.isfile(dsconf['cache_dir']+'/'+name): c.log( "'{0}' is in cache.".format(name) ) return True c.log( "'{0}' is not in cache.".format(name) ) return False
def get_cache( dsconf, object, arguments=() ): """Gets an object from cache.""" name = get_cache_name( dsconf, object, arguments ) file = open(dsconf['cache_dir']+'/'+name, 'rb') data = cPickle.load(file) file.close() c.log( "Got '{0}' from cache.".format(name) ) return data
def compute( dsconf, community, start, end ): """Compute community out-degrees.""" c.log( 'Starting' ) outdegrees = collections.defaultdict(long) # Get all messages and replies for the community messages = ds.get_messages(dsconf, start, end, community) # Get out-degrees for message in messages: outdegrees[message[ds.field('messages','user_id')]] += 1 # Return the out-degrees c.log( 'Finished' ) return outdegrees
def get_conversations( dsconf, community=None ): """get_conversations() facility wrapper.""" # Results: tuple of (id, community_id) tuples c.log( 'Starting' ) if dsconf['caching'] and in_cache(dsconf,'conversations',(community,)): results = get_cache( dsconf, 'conversations', (community,) ) else: results = get_engine( dsconf ) \ .get_source( dsconf ) \ .get_conversations( dsconf, community ) if dsconf['caching']: store_cache( dsconf, 'conversations', results, (community,) ) c.log( '{0} conversation(s) found.'.format(len(results)) ) return results
def get_communities( dsconf ): """get_communities() facility wrapper.""" # Results: tuple of (id, title, start_date, end_date) tuples c.log( 'Starting' ) if dsconf['caching'] and in_cache( dsconf, 'communities' ): results = get_cache( dsconf, 'communities' ) else: results = get_engine( dsconf ) \ .get_source( dsconf ) \ .get_communities( dsconf ) if dsconf['caching']: store_cache( dsconf, 'communities', results ) c.log( '{0} community(ies) found.'.format(len(results)) ) return results
def get_users( dsconf ): """get_users() facility wrapper.""" # Results: tuple of (id,) tuples c.log( 'Starting' ) if dsconf['caching'] and in_cache( dsconf, 'users' ): results = get_cache( dsconf, 'users' ) else: results = get_engine( dsconf ) \ .get_source( dsconf ) \ .get_users( dsconf ) if dsconf['caching']: store_cache( dsconf, 'users', results ) c.log( '{0} user(s) found.'.format(len(results)) ) return results
def checkKey( key ): """Check if given key is allowed to proceed.""" # Read the access keys configuration config = ConfigParser.RawConfigParser() config.read(WEBSERVICE_CONF) accesskeys = config.options('accesskeys') # Check key if key in accesskeys: c.log( "Allowed key {key} ({description}).".format( key=key, description=config.get('accesskeys',key)) ) return True # No key c.log( "DENIED key {key}.".format(key=key) ) return False
def get_messages( dsconf, start, end, community=None ): """get_messages() facility wrapper.""" # Results: tuple of (id, user_id, conversation_id, datetime, # target_id, target_datetime) tuples c.log( 'Starting' ) if dsconf['caching'] and in_cache(dsconf,'messages',(start,end,community)): results = get_cache( dsconf, 'messages', (start,end,community) ) else: results = get_engine( dsconf ) \ .get_source( dsconf ) \ .get_messages( dsconf, start, end, community ) if dsconf['caching']: store_cache( dsconf, 'messages', results, (start,end,community) ) c.log( '{0} message(s) found.'.format(len(results)) ) return results
def compute(dsconf, community, start, end): """Compute community in-degrees.""" c.log("Starting") indegrees = collections.defaultdict(long) # Get all messages and replies for the community messages = ds.get_messages(dsconf, start, end, community) messages = common.build_msgmap(messages, ds.field("messages", "id")) # Get in-degrees for (id, message) in messages.iteritems(): if message[0][ds.field("messages", "target_id")] is not None: indegrees[message[0][ds.field("messages", "user_id")]] += 1 # Return the in-degrees c.log("Finished") return indegrees
def main(): """Main function.""" # Process server arguments parser = argparse.ArgumentParser(description="WebService server.") parser.add_argument( "-l", "--loglevel", help="loglevel (DEBUG,WARN)", metavar="LOGLEVEL" ) parser.add_argument( "-a", "--address", help="IP address for binding (default: 127.0.0.1)", metavar="ADDRESS", default="127.0.0.1" ) parser.add_argument( "-p", "--port", help="HTTP port to use (default: 8080)", metavar="PORT", default=8080, type=int ) arguments = parser.parse_args() # Set the log level if arguments.loglevel: loglevel = eval(arguments.loglevel, logging.__dict__) logger = logging.getLogger("") logger.setLevel(loglevel) # Print ZSI installed version c.log( 'Starting' ) c.log( 'ZSI version: {0}'.format(zsiversion) ) # Run the server (use address='' for binding to all interfaces) c.log( 'Waiting for requests ...' ) ServiceContainer.AsServer( address=arguments.address, port=arguments.port, services=[RobustCommunityAnalysisImpl(),] )
def build_replies( messages, fields ): """Compute the replies tuples given user-mapped 'messages' Usage: {userid:set((pstx,psty,txy))} = replies( messages ), where: pstx = message id that was replied by 'psty', psty = message id that is a reply to 'pstx', txy = time delay between 'pstx' and 'psty'""" c.log( 'Starting' ) replies = collections.defaultdict(set) # Any messages or users? if not messages: c.log( 'No messages or users given -- nothing to do.' ) return replies # Compute each user reply tuples (pstx,psty,txy) for user in messages.keys(): for message in messages[user]: if ( message[fields['target_id']] is not None ) and \ ( message[fields['target_datetime']] is not None ): replies[user].add(( message[fields['target_id']], message[fields['id']], message[fields['datetime']] - \ message[fields['target_datetime']] )) # Return reply tuples c.log( '{0} user(s) with replies found.'.format(len(replies)) ) return replies
def soap_getCommunities(self, ps, **kw): request, response = RobustCommunityAnalysis \ .soap_getCommunities(self, ps, **kw) # Get arguments accessKey = request.get_element_accessKey() if not checkKey(accessKey): raise Exception("Your accessKey is not valid.") dataSourceId = request.get_element_dataSourceId() c.log( 'Starting for dataSourceId {}'.format(dataSourceId) ) # Build the datasource configuration dsconfig = ds.build_conf( DATASOURCES[dataSourceId]['name'], DATASOURCES_CONF ) # Get the cache directory config = ConfigParser.RawConfigParser() config.read(GENERAL_CONF) dsconfig['cache_dir'] = config.get('paths','cache_dir') # Compute the available communities COMMUNITIES = ds.get_communities(dsconfig) response = getCommunitiesResponse() communities = [] for id, title, startDate, endDate in COMMUNITIES: if startDate is None or endDate is None: continue community = response.new_communities().new_community() community.set_element_id(id) community.set_element_title(title) community.set_element_startDate( startDate.timetuple() ) community.set_element_endDate( endDate.timetuple() ) communities += [ community ] _communities = response.new_communities() _communities.set_element_community( communities ) response.set_element_communities( _communities ) # Return response c.log( 'Finished' ) return request, response
def compute( dsconf, community, start, end ): """Compute community reciprocities with 'seconds' resolution.""" c.log( 'Starting' ) reciprocities = collections.defaultdict(float) # Get all messages and replies for 'users' messages = ds.get_messages(dsconf, start, end, community) messages = common.build_msgmap(messages, ds.field('messages','user_id')) replies = common.build_replies(messages, ds.fields('messages')) # Get requested users reciprocities c.log( 'Computing reciprocities ...' ) for (user,user_messages) in messages.iteritems(): # Calculate the sum of reply delays delays_sum = dt.timedelta( 0 ) for reply in replies[user]: if reply[2].total_seconds() >= 0: # Ignore negative delay replies delays_sum += reply[2] # txy component # Calculate reciprocity if not replies[user]: reciprocities[user] = float('inf') else: reciprocities[user] = delays_sum.total_seconds() * 1.0 / \ len( replies[user] ) # Return users reciprocities c.log( 'Finished' ) return reciprocities
def getIndicatorScore( indicatorId, dataSourceId, communityId, startDate, endDate ): """Computes the indicator health score.""" c.log( 'Starting' ) # Build the datasource configuration dsconfig = ds.build_conf( DATASOURCES[dataSourceId]['name'], DATASOURCES_CONF ) # Get the cache directory config = ConfigParser.RawConfigParser() config.read(GENERAL_CONF) dsconfig['cache_dir'] = config.get('paths','cache_dir') # Prepare score parameters feature = INDICATORS[indicatorId]['feature'] aggregation = INDICATORS[indicatorId]['aggregation'] consolidation = INDICATORS[indicatorId]['consolidation'][dataSourceId] score = consolidation.compute(aggregation.compute(feature.compute( dsconfig, communityId, startDate, endDate ))) c.log( 'Finished' ) return score
def soap_getHealthScore(self, ps, **kw): request, response = RobustCommunityAnalysis \ .soap_getHealthScore(self, ps, **kw) # Get arguments accessKey = request.get_element_accessKey() if not checkKey(accessKey): raise Exception("Your accessKey is not valid.") dataSourceId = request.get_element_dataSourceId() indicatorId = request.get_element_indicatorId() communityId = request.get_element_communityId() startDate = request.get_element_startDate() startDate = dt.datetime(startDate[0], startDate[1], startDate[2], startDate[3], startDate[4], startDate[5]) endDate = request.get_element_endDate() endDate = dt.datetime(endDate[0], endDate[1], endDate[2], endDate[3], endDate[4], endDate[5]) c.log( ('Starting for dataSourceId {}, indicatorId {}, communityId {}, ' +\ 'startDate {} and endDate {}').format(dataSourceId, indicatorId, communityId, startDate, endDate) ) # Compute the indicator score response = getHealthScoreResponse() score = getIndicatorScore( indicatorId, dataSourceId, communityId, startDate, endDate ) _score = response.new_score() _indicator = _score.new_indicator() _indicator.set_element_id( indicatorId ) _indicator.set_element_title( INDICATORS[indicatorId]['title'] ) _score.set_element_indicator( _indicator ) _score.set_element_value( score ) response.set_element_score( _score ) # Return response c.log( 'Finished' ) return request, response
def build_msgmap( messages, field ): """Build a mapped version of the messages.""" c.log( 'Starting' ) mapped = collections.defaultdict(list) # Any messages? if not messages: c.log( 'No messages given -- nothing to do.' ) return mapped # Build the mapped version, indexed by 'field' for message in messages: mapped[message[field]] += [ message ] c.log( '{0} mappings done.'.format(len(mapped)) ) return mapped
def compute( dsconf, community, start, end ): """Compute community activity.""" c.log( 'Starting' ) activities = collections.defaultdict(float) # Get all messages and replies for the community messages = ds.get_messages(dsconf, start, end, community) messages = common.build_msgmap(messages, ds.field('messages','user_id')) # Get users activities c.log( 'Computing users activity ...' ) for (user,user_messages) in messages.iteritems(): activities[user] = len( user_messages ); # Return users activities c.log( 'Finished' ) return activities
def compute( dsconf, community, start, end ): """Compute community popularities.""" c.log( 'Starting' ) popularities = collections.defaultdict(float) # Get all messages and replies for the community messages = ds.get_messages(dsconf, start, end, community) messages = common.build_msgmap(messages, ds.field('messages','user_id')) replies = common.build_replies(messages, ds.fields('messages')) # Get requested users popularities c.log( 'Computing users popularities ...' ) for (user,user_messages) in messages.iteritems(): popularities[user] = len( replies[user] ) * 1.0 / \ len( user_messages ) # Return users popularities c.log( 'Finished' ) return popularities
def compute_feature( parms ): """Do the actual feature computation.""" c.log( 'Starting' ) # Try to load the requested feature try: feature = imp_module( 'features.'+parms['feature_name'] ) except ImportError: c.log( "The feature '{0}' could not be loaded." \ .format(parms['feature_name']) ) return False # Try to load the requested aggregation try: aggregation = imp_module( 'aggregations.' + parms['aggregation_name'] ) except ImportError: c.log( "The aggregation '{0}' could not be loaded." \ .format(parms['aggregation_name']) ) return False # Try to load the requested consolidation try: consolidation = imp_module( 'consolidations.' + parms['consolidation_name'] ) except ImportError: c.log( "The consolidation '{0}' could not be loaded." \ .format(parms['consolidation_name']) ) return False # Build the datasource configuration dsconfig = ds.build_conf( parms['data_source'], DATASOURCES_CONF ) # Get the cache directory config = ConfigParser.RawConfigParser() config.read(GENERAL_CONF) dsconfig['cache_dir'] = config.get('paths','cache_dir') # Compute the requested feature c.log("Computing '{0}' in {1}, from {2} to {3} ..." \ .format( parms['feature_name'], ('community id {0}'.format(parms['community_id']), 'all communities')[parms['community_id'] is None], parms['start_date'], parms['end_date'] )) start_time = t.time() values = feature.compute( dsconfig, parms['community_id'], parms['start_date'], parms['end_date']) c.log('Time elapsed: {0}s'.format(t.time() - start_time) ) # Compute aggregation/consolidation values = consolidation.compute( aggregation.compute(values) ) # Output values print values c.log('Finished')