class Aggregator: def __init__(self): self.client = MongoConnector() def process(self): self.process_media() def process_media(self): for user in self.client.current_db.users.find(): #Have media and tag lists on a per-user basis self.media = [] #Grab the facebook media print print 'Grabbing facebook media...' try: fb_media = FacebookConnector(user[u'oauth_token'], self.client, user).get_media() self.media.append(('facebook', fb_media)) except facebook.GraphAPIError: print 'Error with facebook for user ' + user['name'] #For each service they like, find the likes try: for service in user[u'tokens']: if (service[u'provider'] == 'vimeo'): print print 'Grabbing vimeo media...' vimeo_media = VimeoConnector(service[u'username']).get_media() self.media.append(('vimeo', vimeo_media)) if (service[u'provider'] == 'twitter'): print print 'Grabbing twitter media...' twitter_media = TwitterConnector(service[u'oauth_token'], service[u'oauth_token_secret']).get_media() self.media.append(('twitter', twitter_media)) if (service[u'provider'] == 'lastfm'): print print 'Grabbing lastfm media...' lastfm_media = LastFMConnector(service[u'username']).get_media() self.media.append(('lastfm', lastfm_media)) if (service[u'provider'] == 'soundcloud'): print print 'Grabbing soundcloud media...' soundcloud_media = SoundcloudConnector(service[u'oauth_token']).get_media() self.media.append(('soundcloud', soundcloud_media)) if (service[u'provider'] == 'google_login'): print print 'Grabbing youtube media...' youtube_media = YoutubeConnector(service[u'oauth_token_secret']).get_media() self.media.append(('youtube', youtube_media)) except KeyError: print 'No other service tokens' #All processed, insert media and tags together self.client.insert_media(user[u'_id'], self.media)
def __init__(self, bot): threading.Thread.__init__(self) self.db = MongoConnector() self.db.connect(cp['DATABASE']['Address'], cp['DATABASE']['Name']) self.collection = cp['DATABASE']['MonitoringCollection'] self.telegram_bot = bot self.running = True print('Reward crawler started')
class RewardCrawler(threading.Thread): def __init__(self, bot): threading.Thread.__init__(self) self.db = MongoConnector() self.db.connect(cp['DATABASE']['Address'], cp['DATABASE']['Name']) self.collection = cp['DATABASE']['MonitoringCollection'] self.telegram_bot = bot self.running = True print('Reward crawler started') def terminate(self): self.running = False def run(self): while self.running: success, result = self.db.find(self.collection, {}, many=True) if not success: continue for entry in result: new_transactions = blockchain.get_new_transactions(entry['address'], entry['last_transaction']) for transaction in reversed(new_transactions): timestamp = int(transaction[2]) received = round(float(transaction[1]) - float(transaction[0]), 7) entry['balance'] += received if entry['last_transaction'] < timestamp: entry['last_transaction'] = timestamp message = NEW_TRANSACTION_MESSAGE_TEMPLATE.format(entry['name'], timestamp_to_date(timestamp), float(received)) try: self.telegram_bot.send_message(chat_id=entry['telegram_id'], text=message) except Exception as e: print("User blocked bot by id:", entry['telegram_id']) # entry['total_transactions'] = blockchain.get_total_transactions(entry['address']) db.update(self.collection, {'_id': entry['_id']}, entry) time.sleep(0.1) global last_checked last_checked = datetime.datetime.utcnow() time.sleep(CRAWLER_SLEEP_TIME)
class TopAggregator: def __init__(self): self.client = MongoConnector() def process(self): self.media = [] yt = TopYoutube() yt_media = yt.discover() print str(len(yt_media)) + " top youtube items" if yt_media is not None: self.drop_top('youtube') self.media.append(('youtube', yt_media)) vi = TopVimeo() vi_media = vi.discover() print str(len(vi_media)) + " top vimeo items" if vi_media is not None: self.drop_top('vimeo') self.media.append(('vimeo', vi_media)) sc = TopSoundcloud() sc_media = sc.discover() print str(len(sc_media)) + " top soundcloud items" if sc_media is not None: self.drop_top('soundcloud') self.media.append(('soundcloud', sc_media)) sp = TopSpotify() sp_media = sp.discover() print str(len(sp_media)) + " top spotify items" if sp_media is not None: self.drop_top('spotify') self.media.append(('spotify', sp_media)) # Updates daily so drop old featured media, still exists in main media table self.client.insert_unassigned_media(self.media) def drop_top(self, provider): self.client.current_db.featured_media.remove({'provider' : provider})
class TopAggregator: def __init__(self): self.client = MongoConnector() def process(self): self.media = [] yt = TopYoutube() yt_media = yt.discover() self.media.append(('youtube', yt_media)) vi = TopVimeo() vi_media = vi.discover() self.media.append(('vimeo', vi_media)) sc = TopSoundcloud() sc_media = sc.discover() self.media.append(('soundcloud', sc_media)) self.client.insert_unassigned_media(self.media)
def __init__(self): # Initialisation of self.num_factors = 40 self.num_iterations = 30 self.reg_param = 5.0 self.alpha = 10.0 self.client = MongoConnector() # Maps user ids to matrix indexes self.user_index = {} # Maps item ids to matrix indexes self.item_index = {} # Maps matrix indexes to item ids self.index_item = {} # Maps matrix indexes to user ids self.index_user = {} self.errors = []
def main(): tornado.options.options['logging'] = "INFO" tornado.options.parse_command_line() io_loop = tornado.ioloop.IOLoop.instance() app = make_app() # MongoConnector is our mongodb connector mongo_connector = MongoConnector('test_database', mongo_address=settings.MONGO_ADDRESS) app.mongo_connector = mongo_connector # PikaConnector is our rabbitmq consumer app.pika_connector = PikaConnector( io_loop, mongo_connector, rabbitmq_address=settings.RABBITMQ_ADDRESS) app.pika_connector.run() try: app.listen(8888) io_loop.start() except KeyboardInterrupt: app.pika_connector.stop()
#! /usr/bin/python ''' Created on 23/06/2013 @author: raul ''' import BeautifulSoup as BS from urllib2 import urlopen from HTMLParser import HTMLParser from mongo_connector import MongoConnector import info_valladolid_item_scrapper as item_scrapper connector = MongoConnector() def process_rss(rss_url): rss = urlopen(rss_url).read() soup = BS.BeautifulSoup(rss) items = soup.findAll('item') jsons = [] for i, item in enumerate(items): print '----------- item', i+1 , '-----------' print_rss_item(item) item_content = HTMLParser().unescape(item.description.string) json = item_scrapper.process_item_content(BS.BeautifulSoup(item_content), HTMLParser().unescape(item.guid.string)) if json: jsons.append(json) connector.insert(json) #process_item_url(HTMLParser().unescape(item.link.next)) return jsons
from mongo_connector import MongoConnector from vimeo_connector import VimeoConnector from twitter_connector import TwitterConnector from facebook_connector import FacebookConnector from lastfm_connector import LastFMConnector from soundcloud_connector import SoundcloudConnector client = MongoConnector() #For each system user for user in client.current_collection.find(): media_list = [] fb_media = FacebookConnector(user[u'oauth_token'], client).get_media() media_list.append(('facebook', fb_media)) #For each service they like, find the likes try: for service in user[u'tokens']: if (service[u'provider'] == 'vimeo'): vimeo_media = VimeoConnector(service[u'username']).get_media() media_list.append(('vimeo', vimeo_media)) if (service[u'provider'] == 'twitter'): twitter_media = TwitterConnector(service[u'oauth_token'], service[u'oauth_token_secret']).get_media() media_list.append(('twitter', twitter_media)) if (service[u'provider'] == 'lastfm'): lastfm_media = LastFMConnector(service[u'username']).get_media() media_list.append(('lastfm', lastfm_media)) if (service[u'provider'] == 'soundcloud'):
class ImplicitMF(): def __init__(self): # Initialisation of self.num_factors = 40 self.num_iterations = 30 self.reg_param = 5.0 self.alpha = 10.0 self.client = MongoConnector() # Maps user ids to matrix indexes self.user_index = {} # Maps item ids to matrix indexes self.item_index = {} # Maps matrix indexes to item ids self.index_item = {} # Maps matrix indexes to user ids self.index_user = {} self.errors = [] def process(self): self.users = self.client.current_db.users.find() # Matrix factorization only applies to media items in the system, not FB or Twitter self.items = self.client.current_db.user_media.find({'$or' : [{'link': {'$regex' : ".*spotify.*"}}, {'link': {'$regex' : ".*soundcloud.*"}}, {'link': {'$regex' : ".*youtube.*"}}, {'link': {'$regex' : ".*vimeo.*"}}] }) self.num_users = self.users.count() self.num_items = self.items.count() self.counts = np.zeros((self.num_users, self.num_items)) self.construct_matrix(self.counts) # Essentially defines the confidence values -1, aka the r_{ui} values # allows Cu to be notated as Cu + I later on in the code self.counts *= self.alpha self.countsCopy = self.counts.copy() self.counts= sparse.csr_matrix(self.counts) self.train_model() predictions = self.predict(self.user_vectors,self.item_vectors) self.print_prediction(predictions) self.store_predictions(predictions) def predict(self,user_vectors,item_vectors): # For each index in reconstructed matrix, calculate using dot product predictions = np.zeros((self.num_users,self.num_items)) for i in range(self.num_users): for j in range(self.num_items): predictions[i][j] = self.user_vectors[i].T.dot(mf.item_vectors[j]) return predictions def construct_matrix(self,counts): # Give all users an index for matrix for i,user in enumerate(self.users): self.user_index[user['_id']] = i self.index_user[i] = user['_id'] # Give all items an index for matrix for i,item in enumerate(self.items): self.item_index[item['_id']] = i self.index_item[i] = item['_id'] n = self.item_index[item['_id']] # For each user rating for this item, find the users index and put a 1 in matrix for user in (item['user_ratings']): m = self.user_index[user['user']] counts[m][n] = 1 def getItemName(self, objectid): item = self.client.current_db.media.find_one({"_id" : objectid}) return item['name'] + ' - ' + item['link'] def getUserName(self, userid): user = self.client.current_db.users.find_one({"_id" : userid}) return user['name'] # Recalculates user factor and item factor vectors for a fixed number of iterations using ALS def train_model(self): # Initialise to random noise self.user_vectors = np.random.normal(size=(self.num_users, self.num_factors)) self.item_vectors = np.random.normal(size=(self.num_items, self.num_factors)) # For each iteration for i in xrange(self.num_iterations): t0 = time.time() # Fix item vectors and solve for user vector print 'Solving for user vectors...' self.user_vectors = self.iteration(True, sparse.csr_matrix(self.item_vectors)) # Fix user vectors and solve for item vectors print 'Solving for item vectors...' self.item_vectors = self.iteration(False, sparse.csr_matrix(self.user_vectors)) t1 = time.time() print 'iteration %i finished in %f seconds' % (i + 1, t1 - t0) def iteration(self, user, fixed_vecs): # Number of user / item vectors you are solving for num_solve = self.num_users if user else self.num_items # Size of fixed matrix num_fixed = fixed_vecs.shape[0] # Precalculate matrices they dont depend on u # Y^t Y calculated YTY = fixed_vecs.T.dot(fixed_vecs) # Identity matrix eye = sparse.eye(num_fixed) # LambaI lambda_eye = self.reg_param * sparse.eye(self.num_factors) # Initialise a vector to store results of recomputed factor vector solve_vecs = np.zeros((num_solve, self.num_factors)) t = time.time() # For each item / user you need to recalcualte for for i in xrange(num_solve): if user: # if recomputing user vectors, retrieve their ratings counts_i = self.counts[i].toarray() else: # else, if recomputing item vectors, get all ratings for item i counts_i = self.counts[:, i].T.toarray() CuI = sparse.diags(counts_i, [0]) pu = counts_i.copy() # setting preferences from c values. pu[np.where(pu != 0)] = 1.0 # Calculate Ttrans YTCuIY = fixed_vecs.T.dot(CuI).dot(fixed_vecs) YTCupu = fixed_vecs.T.dot(CuI + eye).dot(sparse.csr_matrix(pu).T) xu = spsolve(YTY + YTCuIY + lambda_eye, YTCupu) solve_vecs[i] = xu return solve_vecs def predict(self,user_vectors,item_vectors): # For each index in reconstructed matrix, calculate using dot product predictions = np.zeros((self.num_users,self.num_items)) for i in range(self.num_users): for j in range(self.num_items): predictions[i][j] = self.user_vectors[i].T.dot(self.item_vectors[j]) return predictions def print_prediction(self,predictions): for i in range (0,len(predictions)): for j, x in enumerate(predictions[i]): # If we know they already like it, set prediction to 0 so it's not suggested(?) if (self.countsCopy[i][j] != 0): predictions[i][j] = 0 # Finds top 10 items for user i, finds item names instead of id's n = (len(np.where(predictions[i] > 0)[0])/10) topn = np.argpartition(predictions[i], -n)[-n:] topn[:] = topn[::-1] items = [] for k in topn: items.append(self.getItemName(self.index_item[k])) print self.getUserName(self.index_user[i]) + '\n' + str(items) print def store_predictions(self,predictions): for i in range(0,len(predictions)): for j, x in enumerate(predictions[i]): # If we know they already like it, set prediction to 0 so it's not suggested(?) if (self.countsCopy[i][j] == 1): predictions[i][j] = -1 user = self.client.current_db.users.find_one({'_id' : self.index_user[i]}) media = [] n = (len(np.where(predictions[i] > 0)[0])/10) topn = np.argpartition(predictions[i], -n)[-n:] topn[:] = topn[::-1] for index in topn: item_id = self.index_item[index] media.append({'user' : user['_id'], 'media' : item_id}) self.client.store_prioritised_media(user, media, 'implicit')
import topics2themes.make_topic_models as make_topic_models from topics2themes.mongo_connector import MongoConnector from topics2themes.theme_sorter import ThemeSorter from topics2themes.environment_configuration import * from topics2themes.topic_model_constants import * app = Flask(__name__, template_folder="user_interface") if RUN_LOCALLY: CORS(app) else: app.config['MONGO_CONNECT'] = False try: mongo_con = MongoConnector() except: e = sys.exc_info() print("The following error occurred: ") print(e) print("The pymongo database might not be running") mongo_con = MongoConnector() exit(1) theme_sort = ThemeSorter(mongo_con) # To not have a lot of space in the output app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False def get_more_exception_info(): trace_back = traceback.format_exc()
from api import Api from mongo_connector import MongoConnector import json api = Api() db = MongoConnector() repositories = [{ 'owner': 'pytorch', 'name': 'pytorch' }, { 'owner': 'tensorflow', 'name': 'tensorflow' }, { 'owner': 'scrapy', 'name': 'scrapy' }, { 'owner': 'scikit-learn', 'name': 'scikit-learn' }, { 'owner': 'ranger', 'name': 'ranger' }, { 'owner': 'django', 'name': 'django' }, { 'owner': 'ranger', 'name': 'ranger' }] for repo in repositories:
import time import logging import datetime import re # Enable logging logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) logger = logging.getLogger(__name__) cp = ConfigParser() cp.optionxform = str cp.read('../config.ini') db = MongoConnector() db.connect(cp['DATABASE']['Address'], cp['DATABASE']['Name']) blockchain = BlockchainConnector() blockchain.connect(cp['POSTGRES']) monitoring_collection = cp['DATABASE']['MonitoringCollection'] CRAWLER_SLEEP_TIME = 120 NEW_TRANSACTION_MESSAGE_TEMPLATE = 'New transaction for "{}" ({}): {} XSN' telegram_bot_token = cp['TELEGRAM']['SecretKey'] DATE_FORMAT = '%d/%m/%Y %H:%M:%S' ADD_ADDRESS_MESSAGE = 'Enter address for ' ADD_NAME_MESSAGE = 'Enter monitor name'
classifications = clf.predict_proba(transformed) #print("classifications", classifications) sorted_prob_themes = sorted( [(prob, theme_nr) for prob, theme_nr in zip(classifications[0], classes)], reverse=True) sorted_themes = [ int(theme_nr) for (prob, theme_nr) in sorted_prob_themes ] print("Used logistic regression classification") sorted_themes_using_topic_connection = self.rank_according_to_topic_connection( document_id, sorted_themes, potential_theme_dict) for theme in all_theme_nrs: # themes that have no associate documents or description, and therefore aren't classifier ranked, are ranked as last if theme not in sorted_themes_using_topic_connection: sorted_themes_using_topic_connection.append(theme) themes_str = [ str(theme) for theme in sorted_themes_using_topic_connection ] return themes_str if __name__ == '__main__': mc = MongoConnector() ts = ThemeSorter(mc) ts.retrain_model("61ea6c0301c7c1346b1ff9f4") print(ts.rank_themes_for_document("61ea6c0301c7c1346b1ff9f4", "14"))
class Stager: def __init__(self): self.client = MongoConnector() def process(self): relation_collection = self.client.current_db.relations user_collection = self.client.current_db.users user_graphs_collection = self.client.current_db.user_graphs for relation in relation_collection.find(): self.stage_user(relation) self.prioritise_media() def stage_user(self, relation): ranks = self.rank_order(relation) self.client.store_rankings(relation, ranks) def rank_order(self, relation): #Create a list of all users which are at all related to this user ranks = [{'user' : x} for x in set([y['user'] for y in (relation['similar'] + relation['direct'])])] for user_links in relation['similar']: for rank in ranks: if rank['user'] == user_links['user']: rank['similar'] = len(user_links['links']) continue for user_links in relation['direct']: for rank in ranks: if rank['user'] == user_links['user']: rank['direct'] = len(user_links['links']) continue for rank in ranks: if 'similar' in rank.keys(): if 'direct' in rank.keys(): rank['total'] = int(rank['similar']) + (int(rank['direct'])*10) else: rank['total'] = int(rank['similar']) elif 'direct' in rank.keys(): rank['total'] = int(rank['direct'])*10 else: rank['total'] = 0 return sorted(ranks, key=lambda rank: rank['total'], reverse=True) def prioritise_media(self): user_media_collection = self.client.current_db.user_media user_graphs_collection = self.client.current_db.user_graphs for user in user_graphs_collection.find(): media = [] #For each user here, put a load of media in staged_media table if len(user[u'ranks']) > 0: #For each media item, store the user and then their stuff with a priority attached for user_media_item in user_media_collection.find(): for rating in user_media_item['user_ratings']: for u in user[u'ranks']: if rating[u'user'] == u[u'user']: #Here we have a media item with a rating by a user with a priority media.append({'user' : user[u'_id'], 'media' : user_media_item[u'_id'], 'similarity' : u['total'], 'similar_user' : u['user']}) if len(media) > 0: self.client.store_prioritised_media(user, media)
class Aggregator: def __init__(self): self.client = MongoConnector() def process(self): self.process_media() def process_media(self): for user in self.client.current_db.users.find(): print 'Aggregating ' + user['name'] + '\'s content' #Have media and tag lists on a per-user basis self.media = [] #For each service they like, find the likes try: for service in user[u'tokens']: if (service[u'provider'] == 'vimeo'): print print 'Grabbing vimeo media...' vimeo_media = VimeoConnector(service[u'oauth_token']).get_media() print str(len(vimeo_media)) + " items returned " self.media.append(('vimeo', vimeo_media)) if (service[u'provider'] == 'twitter'): print print 'Grabbing twitter media...' twitter_media = TwitterConnector(service[u'uid'],service[u'oauth_token'], service[u'oauth_token_secret']).get_media() print str(len(twitter_media)) + " items returned " self.media.append(('twitter', twitter_media)) if (service[u'provider'] == 'soundcloud'): print print 'Grabbing soundcloud media...' soundcloud_media = SoundcloudConnector(service[u'oauth_token']).get_media() print str(len(soundcloud_media)) + " items returned " self.media.append(('soundcloud', soundcloud_media)) if (service[u'provider'] == 'google_oauth2'): print print 'Grabbing youtube media...' youtube_media = YoutubeConnector(service[u'oauth_token_secret']).get_media() print str(len(youtube_media)) + " items returned " self.media.append(('youtube', youtube_media)) if (service[u'provider'] == 'spotify'): print print 'Grabbing spotify media...' token = SpotifyRefresher(service,user['_id']).check_token() spotify_media = SpotifyConnector(token).get_media() print str(len(spotify_media)) + " items returned " self.media.append(('spotify', spotify_media)) except KeyError,e: print e print 'No other service tokens' #All processed, insert media and tags together self.client.insert_media(user[u'_id'], self.media)
def get(self): global mongoConnector word_document = mongoConnector.getDocumentByWord( request.args.get("word")[1:-1]) return word_document.guessNextWord() class Ping(Resource): def get(self): return "Alive" if __name__ == "__main__": if len(sys.argv) != 5: print("Usage: problem3.py <host> <port> <database> <collection>") sys.exit(-1) mongoConnector = MongoConnector(sys.argv[1], int(sys.argv[2]), sys.argv[3], sys.argv[4]) app = Flask(__name__) api = Api(app) api.add_resource(Ping, '/') api.add_resource(Next, '/gutenberg/predict/next/') api.add_resource(Guess, '/gutenberg/predict/random/') app.run(debug=True)
def __init__(self): self.client = MongoConnector()
class Relator: def __init__(self): self.client = MongoConnector() def process(self): self.process_relationships() def process_relationships(self): tag_collection = self.client.current_db.tags.find() basic_tags = [] direct_tags = [] for tag in tag_collection: if 'associations' in tag.keys(): #See if this term appears alongside others for assocs in tag[u'associations']: #Create a list of all basic tags and their strongly linked phrases basic_tags.append((tag[u'phrase'], (assocs[u'phrase'], assocs['count']), tag[u'users'])) else: #See if people like identical titles users = [(user[u'user'], user[u'count']) for user in tag[u'users']] direct_tags.append((tag[u'phrase'], users)) similar_tags = self.create_similar_preferences([tag for tag in basic_tags if tag[1][1] > THRESHOLD]) direct_tags = [tag for tag in direct_tags if len(tag[1]) > 1] self.process_relations(similar_tags, direct_tags) def process_relations(self, similar_tags, direct_tags): relation_collection = self.client.current_db.relations for current_user in self.client.current_db.users.find(): print 'Printing relationships for ' + current_user[u'name'] + ':' relation_item = relation_collection.find_one({'user': ObjectId(current_user[u'_id']) }) if relation_item is None: relation = {u'user' : current_user[u'_id']} else: relation = relation_item relation[u'direct'] = self.get_direct_links(current_user, direct_tags) relation[u'similar'] = self.get_similar_links(current_user, similar_tags) self.client.store_relation(relation) def get_direct_links(self, current_user, direct_tags): direct = [] #For every other user, append to the direct list all the phrases it shares in common (if any) for user in [user for user in self.client.current_db.users.find() if user['_id'] != current_user[u'_id']]: tags = [] #For every full tag for direct_tag in direct_tags: #If the current user likes this phrase if self.direct_tag_contains_this_user(current_user, direct_tag) and self.direct_tag_contains_this_user(user, direct_tag): #Add a link to this phrase alongside this user tags.append(direct_tag[0]) if len(tags) > 0: direct.append({'user' : user['_id'], 'links' : tags}) return direct def create_similar_preferences(self, similar_tags): sets = [] tag_collection = self.client.current_db.tags for current_user in self.client.current_db.users.find(): linked_sets = set([]) #If user likes strongly-linked tags x and y, and another user also likes them, they're similar for tag in similar_tags: if self.similar_tag_contains_this_user(current_user, tag): #Here we have a tag, and its strongly linked neighbour #Need to see whether the user also likes the strongly linked phrase secondary_tag = tag_collection.find_one({'phrase' : tag[1][0]}) if current_user[u'_id'] in [ids[u'user'] for ids in secondary_tag[u'users']]: linked_sets.add(frozenset([tag[0], secondary_tag['phrase']])) sets.append({'user' : current_user[u'_id'], 'linked' : linked_sets}) return sets def get_similar_links(self, current_user, similar_tags): similar = [] #Hacky list comprehension way of getting the linked information for the current user current_user_set = [similar_tag['linked'] for similar_tag in similar_tags if similar_tag['user'] == current_user[u'_id']][0] for user in [similar_tag for similar_tag in similar_tags if similar_tag['user'] != current_user[u'_id']]: intersection = user['linked'].intersection(current_user_set) if len(intersection) > 0: print current_user[u'name'] + ' has something in common with ' + self.client.current_db.users.find_one({'_id' : user['user']})['name'] + '!' print 'They are both interested in:' print [list(item) for item in list(intersection)] similar.append({'user' : user['user'], 'links' : [list(item) for item in list(intersection)]})