Esempio n. 1
0
class Aggregator:

	def __init__(self):
		self.client = MongoConnector()

	def process(self):
		self.process_media()

	def process_media(self):
		for user in self.client.current_db.users.find():

			#Have media and tag lists on a per-user basis
			self.media = []

			#Grab the facebook media
			print
			print 'Grabbing facebook media...'
			try:
				fb_media = FacebookConnector(user[u'oauth_token'], self.client, user).get_media()
				self.media.append(('facebook', fb_media))
			except facebook.GraphAPIError:
				print 'Error with facebook for user ' + user['name']


			#For each service they like, find the likes
			try:
				for service in user[u'tokens']:

					if (service[u'provider'] == 'vimeo'):
						print
						print 'Grabbing vimeo media...'
					 	vimeo_media = VimeoConnector(service[u'username']).get_media()
					 	self.media.append(('vimeo', vimeo_media))
					if (service[u'provider'] == 'twitter'):
						print
						print 'Grabbing twitter media...'
						twitter_media = TwitterConnector(service[u'oauth_token'], service[u'oauth_token_secret']).get_media()
						self.media.append(('twitter', twitter_media))
					if (service[u'provider'] == 'lastfm'):
						print
						print 'Grabbing lastfm media...'
						lastfm_media = LastFMConnector(service[u'username']).get_media()
						self.media.append(('lastfm', lastfm_media))
					if (service[u'provider'] == 'soundcloud'):
						print
						print 'Grabbing soundcloud media...'
						soundcloud_media = SoundcloudConnector(service[u'oauth_token']).get_media()
						self.media.append(('soundcloud', soundcloud_media))
					if (service[u'provider'] == 'google_login'):
						print
						print 'Grabbing youtube media...'
						youtube_media = YoutubeConnector(service[u'oauth_token_secret']).get_media()
						self.media.append(('youtube', youtube_media))

			except KeyError:
				print 'No other service tokens'

			#All processed, insert media and tags together
			self.client.insert_media(user[u'_id'], self.media)
Esempio n. 2
0
    def __init__(self, bot):
        threading.Thread.__init__(self)
        self.db = MongoConnector()
        self.db.connect(cp['DATABASE']['Address'], cp['DATABASE']['Name'])
        self.collection = cp['DATABASE']['MonitoringCollection']

        self.telegram_bot = bot

        self.running = True

        print('Reward crawler started')
Esempio n. 3
0
class RewardCrawler(threading.Thread):
    def __init__(self, bot):
        threading.Thread.__init__(self)
        self.db = MongoConnector()
        self.db.connect(cp['DATABASE']['Address'], cp['DATABASE']['Name'])
        self.collection = cp['DATABASE']['MonitoringCollection']

        self.telegram_bot = bot

        self.running = True

        print('Reward crawler started')

    def terminate(self):
        self.running = False

    def run(self):
        while self.running:
            success, result = self.db.find(self.collection, {}, many=True)
            if not success:
                continue

            for entry in result:
                new_transactions = blockchain.get_new_transactions(entry['address'], entry['last_transaction'])

                for transaction in reversed(new_transactions):
                    timestamp = int(transaction[2])
                    received = round(float(transaction[1]) - float(transaction[0]), 7)

                    entry['balance'] += received
                    if entry['last_transaction'] < timestamp:
                        entry['last_transaction'] = timestamp

                    message = NEW_TRANSACTION_MESSAGE_TEMPLATE.format(entry['name'],
                                                                      timestamp_to_date(timestamp),
                                                                      float(received))
                    try:
                        self.telegram_bot.send_message(chat_id=entry['telegram_id'], text=message)
                    except Exception as e:
                        print("User blocked bot by id:", entry['telegram_id'])

                # entry['total_transactions'] = blockchain.get_total_transactions(entry['address'])

                db.update(self.collection, {'_id': entry['_id']}, entry)
                time.sleep(0.1)

            global last_checked
            last_checked = datetime.datetime.utcnow()
            time.sleep(CRAWLER_SLEEP_TIME)
Esempio n. 4
0
class TopAggregator:

	def __init__(self):
		self.client = MongoConnector()

	def process(self):

		self.media = []

		yt = TopYoutube()
		yt_media = yt.discover()
		print str(len(yt_media)) + " top youtube items" 
		if yt_media is not None:
			self.drop_top('youtube')
		self.media.append(('youtube', yt_media))

		vi = TopVimeo()
		vi_media = vi.discover()
		print str(len(vi_media)) + " top vimeo items"
		if vi_media is not None:
			self.drop_top('vimeo')
		self.media.append(('vimeo', vi_media))

		sc = TopSoundcloud()
		sc_media = sc.discover()
		print str(len(sc_media)) + " top soundcloud items"
		if sc_media is not None:
			self.drop_top('soundcloud')
		self.media.append(('soundcloud', sc_media))


		sp = TopSpotify()
		sp_media = sp.discover()
		print str(len(sp_media)) + " top spotify items"
		if sp_media is not None:
			self.drop_top('spotify')
		self.media.append(('spotify', sp_media))

		# Updates daily so drop old featured media, still exists in main media table 
		self.client.insert_unassigned_media(self.media)

	def drop_top(self, provider):
		self.client.current_db.featured_media.remove({'provider' : provider})
Esempio n. 5
0
class TopAggregator:

	def __init__(self):
		self.client = MongoConnector()

	def process(self):
		self.media = []

		yt = TopYoutube()
		yt_media = yt.discover()
		self.media.append(('youtube', yt_media))

		vi = TopVimeo()
		vi_media = vi.discover()
		self.media.append(('vimeo', vi_media))

		sc = TopSoundcloud()
		sc_media = sc.discover()
		self.media.append(('soundcloud', sc_media))

		self.client.insert_unassigned_media(self.media)
Esempio n. 6
0
    def __init__(self):

        # Initialisation of 
        self.num_factors = 40
        self.num_iterations = 30
        self.reg_param = 5.0
        self.alpha = 10.0
        self.client = MongoConnector()

        # Maps user ids to matrix indexes
        self.user_index = {}
        # Maps item ids to matrix indexes
        self.item_index = {}
        # Maps matrix indexes to item ids
        self.index_item = {}
        # Maps matrix indexes to user ids
        self.index_user = {}
        self.errors = []
Esempio n. 7
0
def main():
    tornado.options.options['logging'] = "INFO"
    tornado.options.parse_command_line()

    io_loop = tornado.ioloop.IOLoop.instance()

    app = make_app()

    # MongoConnector is our mongodb connector
    mongo_connector = MongoConnector('test_database',
                                     mongo_address=settings.MONGO_ADDRESS)
    app.mongo_connector = mongo_connector
    # PikaConnector is our rabbitmq consumer
    app.pika_connector = PikaConnector(
        io_loop, mongo_connector, rabbitmq_address=settings.RABBITMQ_ADDRESS)
    app.pika_connector.run()

    try:
        app.listen(8888)
        io_loop.start()

    except KeyboardInterrupt:
        app.pika_connector.stop()
#! /usr/bin/python
'''
Created on 23/06/2013

@author: raul
'''

import BeautifulSoup as BS
from urllib2 import urlopen
from HTMLParser import HTMLParser
from mongo_connector import MongoConnector
import info_valladolid_item_scrapper as item_scrapper

connector = MongoConnector()

def process_rss(rss_url):
    rss = urlopen(rss_url).read()
    soup = BS.BeautifulSoup(rss)
    items = soup.findAll('item')
    jsons = []
    for i, item in enumerate(items):
        print '----------- item', i+1 , '-----------'
        print_rss_item(item)
        item_content = HTMLParser().unescape(item.description.string)
        json = item_scrapper.process_item_content(BS.BeautifulSoup(item_content), HTMLParser().unescape(item.guid.string))
        if json:
            jsons.append(json)
            connector.insert(json)
        #process_item_url(HTMLParser().unescape(item.link.next))
    return jsons
  
Esempio n. 9
0
from mongo_connector import MongoConnector
from vimeo_connector import VimeoConnector
from twitter_connector import TwitterConnector
from facebook_connector import FacebookConnector
from lastfm_connector import LastFMConnector
from soundcloud_connector import SoundcloudConnector

client = MongoConnector()

#For each system user
for user in client.current_collection.find():

	media_list = []

	fb_media = FacebookConnector(user[u'oauth_token'], client).get_media()
	media_list.append(('facebook', fb_media))

	#For each service they like, find the likes
	try:
		for service in user[u'tokens']:

			if (service[u'provider'] == 'vimeo'):
			 	vimeo_media = VimeoConnector(service[u'username']).get_media()
			 	media_list.append(('vimeo', vimeo_media))
			if (service[u'provider'] == 'twitter'):
				twitter_media = TwitterConnector(service[u'oauth_token'], service[u'oauth_token_secret']).get_media()
				media_list.append(('twitter', twitter_media))
			if (service[u'provider'] == 'lastfm'):
				lastfm_media = LastFMConnector(service[u'username']).get_media()
				media_list.append(('lastfm', lastfm_media))
			if (service[u'provider'] == 'soundcloud'):
Esempio n. 10
0
class ImplicitMF():

    def __init__(self):

        # Initialisation of 
        self.num_factors = 40
        self.num_iterations = 30
        self.reg_param = 5.0
        self.alpha = 10.0
        self.client = MongoConnector()

        # Maps user ids to matrix indexes
        self.user_index = {}
        # Maps item ids to matrix indexes
        self.item_index = {}
        # Maps matrix indexes to item ids
        self.index_item = {}
        # Maps matrix indexes to user ids
        self.index_user = {}
        self.errors = []

    def process(self):

        self.users = self.client.current_db.users.find()
        # Matrix factorization only applies to media items in the system, not FB or Twitter
        self.items = self.client.current_db.user_media.find({'$or' : [{'link': {'$regex' : ".*spotify.*"}},
                                      {'link': {'$regex' : ".*soundcloud.*"}},
                                      {'link': {'$regex' : ".*youtube.*"}},
                                      {'link': {'$regex' : ".*vimeo.*"}}] })
        self.num_users = self.users.count()
        self.num_items = self.items.count()
        self.counts = np.zeros((self.num_users, self.num_items))
        self.construct_matrix(self.counts)
        # Essentially defines the confidence values -1, aka the r_{ui} values 
        # allows Cu to be notated as Cu + I later on in the code
        self.counts *= self.alpha
        self.countsCopy = self.counts.copy()
        self.counts= sparse.csr_matrix(self.counts)
        self.train_model()
        predictions = self.predict(self.user_vectors,self.item_vectors)
        self.print_prediction(predictions)
        self.store_predictions(predictions)

    def predict(self,user_vectors,item_vectors):
        # For each index in reconstructed matrix, calculate using dot product
        predictions = np.zeros((self.num_users,self.num_items))
        for i in range(self.num_users):
            for j in range(self.num_items):
                predictions[i][j] = self.user_vectors[i].T.dot(mf.item_vectors[j])
    
        return predictions

    def construct_matrix(self,counts):

        # Give all users an index for matrix
        for i,user in enumerate(self.users):
    
            self.user_index[user['_id']] = i
            self.index_user[i] = user['_id']

        # Give all items an index for matrix
        for i,item in enumerate(self.items):

            self.item_index[item['_id']] = i
            self.index_item[i] = item['_id']

            n = self.item_index[item['_id']]

            # For each user rating for this item, find the users index and put a 1 in matrix
            for user in (item['user_ratings']):

                m = self.user_index[user['user']]
                counts[m][n] = 1

    def getItemName(self, objectid):
        item = self.client.current_db.media.find_one({"_id" : objectid})
        return item['name'] + ' - ' + item['link']

    def getUserName(self, userid):
        user = self.client.current_db.users.find_one({"_id" : userid})
        return user['name']

    # Recalculates user factor and item factor vectors for a fixed number of iterations using ALS
    def train_model(self):

        # Initialise to random noise 
        self.user_vectors = np.random.normal(size=(self.num_users,
                                                   self.num_factors))
        self.item_vectors = np.random.normal(size=(self.num_items,
                                                   self.num_factors))


        # For each iteration
        for i in xrange(self.num_iterations):
            t0 = time.time()
            # Fix item vectors and solve for user vector
            print 'Solving for user vectors...'
            self.user_vectors = self.iteration(True, sparse.csr_matrix(self.item_vectors))
            # Fix user vectors and solve for item vectors
            print 'Solving for item vectors...'
            self.item_vectors = self.iteration(False, sparse.csr_matrix(self.user_vectors))
            t1 = time.time()
                        
            print 'iteration %i finished in %f seconds' % (i + 1, t1 - t0)
        
    def iteration(self, user, fixed_vecs):

        # Number of user / item vectors you are solving for 
        num_solve = self.num_users if user else self.num_items
        # Size of fixed matrix
        num_fixed = fixed_vecs.shape[0]
        
        # Precalculate matrices they dont depend on u

        # Y^t Y calculated
        YTY = fixed_vecs.T.dot(fixed_vecs)

        # Identity matrix
        eye = sparse.eye(num_fixed)
        
        # LambaI
        lambda_eye = self.reg_param * sparse.eye(self.num_factors)
        # Initialise a vector to store results of recomputed factor vector
        solve_vecs = np.zeros((num_solve, self.num_factors))

        t = time.time()

        # For each item / user you need to recalcualte for
        for i in xrange(num_solve):
            if user:
                # if recomputing user vectors, retrieve their ratings 
                counts_i = self.counts[i].toarray()
            else:
                # else, if recomputing item vectors, get all ratings for item i
                counts_i = self.counts[:, i].T.toarray()
            CuI = sparse.diags(counts_i, [0])
            pu = counts_i.copy()
            # setting preferences from c values.
            pu[np.where(pu != 0)] = 1.0
            # Calculate Ttrans 
            YTCuIY = fixed_vecs.T.dot(CuI).dot(fixed_vecs)
            YTCupu = fixed_vecs.T.dot(CuI + eye).dot(sparse.csr_matrix(pu).T)
            xu = spsolve(YTY + YTCuIY + lambda_eye, YTCupu)
            solve_vecs[i] = xu

        return solve_vecs

    def predict(self,user_vectors,item_vectors):
        # For each index in reconstructed matrix, calculate using dot product
        predictions = np.zeros((self.num_users,self.num_items))
        for i in range(self.num_users):
            for j in range(self.num_items):
                predictions[i][j] = self.user_vectors[i].T.dot(self.item_vectors[j])
        
        return predictions

    def print_prediction(self,predictions):

        for i in range (0,len(predictions)):
    
            for j, x in enumerate(predictions[i]):
        
                # If we know they already like it, set prediction to 0 so it's not suggested(?)
                if (self.countsCopy[i][j] != 0):
                    predictions[i][j] = 0 
            
            # Finds top 10 items for user i, finds item names instead of id's
	    n = (len(np.where(predictions[i] > 0)[0])/10)
            topn = np.argpartition(predictions[i], -n)[-n:]
            topn[:] = topn[::-1]
            items = []
            for k in topn:
                items.append(self.getItemName(self.index_item[k]))
            
            print self.getUserName(self.index_user[i]) + '\n' + str(items)
            print
            
    def store_predictions(self,predictions):

        for i in range(0,len(predictions)):

            for j, x in enumerate(predictions[i]):
        
                # If we know they already like it, set prediction to 0 so it's not suggested(?)
                if (self.countsCopy[i][j] == 1):
                    predictions[i][j] = -1 

            user = self.client.current_db.users.find_one({'_id' : self.index_user[i]})

            media = []
            n = (len(np.where(predictions[i] > 0)[0])/10)
            topn = np.argpartition(predictions[i], -n)[-n:]
            topn[:] = topn[::-1]

            for index in topn:
                item_id = self.index_item[index]
                media.append({'user' : user['_id'], 'media' : item_id})

            self.client.store_prioritised_media(user, media, 'implicit')
Esempio n. 11
0
    import topics2themes.make_topic_models as make_topic_models
    from topics2themes.mongo_connector import MongoConnector
    from topics2themes.theme_sorter import ThemeSorter
    from topics2themes.environment_configuration import *
    from topics2themes.topic_model_constants import *


app = Flask(__name__, template_folder="user_interface")

if RUN_LOCALLY:
    CORS(app)
else:
    app.config['MONGO_CONNECT'] = False

try:
    mongo_con = MongoConnector()
except:
    e = sys.exc_info()
    print("The following error occurred: ")
    print(e)
    print("The pymongo database might not be running")
    mongo_con = MongoConnector()
    exit(1)

theme_sort = ThemeSorter(mongo_con)

# To not have a lot of space in the output
app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False

def get_more_exception_info():
    trace_back = traceback.format_exc()
Esempio n. 12
0
from api import Api
from mongo_connector import MongoConnector
import json

api = Api()
db = MongoConnector()

repositories = [{
    'owner': 'pytorch',
    'name': 'pytorch'
}, {
    'owner': 'tensorflow',
    'name': 'tensorflow'
}, {
    'owner': 'scrapy',
    'name': 'scrapy'
}, {
    'owner': 'scikit-learn',
    'name': 'scikit-learn'
}, {
    'owner': 'ranger',
    'name': 'ranger'
}, {
    'owner': 'django',
    'name': 'django'
}, {
    'owner': 'ranger',
    'name': 'ranger'
}]

for repo in repositories:
Esempio n. 13
0
import time
import logging
import datetime
import re

# Enable logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    level=logging.INFO)

logger = logging.getLogger(__name__)

cp = ConfigParser()
cp.optionxform = str
cp.read('../config.ini')

db = MongoConnector()
db.connect(cp['DATABASE']['Address'], cp['DATABASE']['Name'])

blockchain = BlockchainConnector()
blockchain.connect(cp['POSTGRES'])

monitoring_collection = cp['DATABASE']['MonitoringCollection']

CRAWLER_SLEEP_TIME = 120

NEW_TRANSACTION_MESSAGE_TEMPLATE = 'New transaction for "{}" ({}): {} XSN'
telegram_bot_token = cp['TELEGRAM']['SecretKey']

DATE_FORMAT = '%d/%m/%Y %H:%M:%S'
ADD_ADDRESS_MESSAGE = 'Enter address for '
ADD_NAME_MESSAGE = 'Enter monitor name'
Esempio n. 14
0
            classifications = clf.predict_proba(transformed)
            #print("classifications", classifications)

            sorted_prob_themes = sorted(
                [(prob, theme_nr)
                 for prob, theme_nr in zip(classifications[0], classes)],
                reverse=True)
            sorted_themes = [
                int(theme_nr) for (prob, theme_nr) in sorted_prob_themes
            ]
            print("Used logistic regression classification")

        sorted_themes_using_topic_connection = self.rank_according_to_topic_connection(
            document_id, sorted_themes, potential_theme_dict)

        for theme in all_theme_nrs:  # themes that have no associate documents or description, and therefore aren't classifier ranked, are ranked as last
            if theme not in sorted_themes_using_topic_connection:
                sorted_themes_using_topic_connection.append(theme)
        themes_str = [
            str(theme) for theme in sorted_themes_using_topic_connection
        ]
        return themes_str


if __name__ == '__main__':
    mc = MongoConnector()
    ts = ThemeSorter(mc)
    ts.retrain_model("61ea6c0301c7c1346b1ff9f4")
    print(ts.rank_themes_for_document("61ea6c0301c7c1346b1ff9f4", "14"))
Esempio n. 15
0
class Stager:

	def __init__(self):
		self.client = MongoConnector()

	def process(self):
		relation_collection = self.client.current_db.relations
		user_collection = self.client.current_db.users
		user_graphs_collection = self.client.current_db.user_graphs

		for relation in relation_collection.find():
			self.stage_user(relation)

		self.prioritise_media()

	def stage_user(self, relation):
		ranks = self.rank_order(relation)
		self.client.store_rankings(relation, ranks)

	def rank_order(self, relation):
		#Create a list of all users which are at all related to this user
		ranks = [{'user' : x} for x in set([y['user'] for y in (relation['similar'] + relation['direct'])])]

		for user_links in relation['similar']:
			for rank in ranks:
				if rank['user'] == user_links['user']:
					rank['similar'] = len(user_links['links'])
					continue

		for user_links in relation['direct']:
			for rank in ranks:
				if rank['user'] == user_links['user']:
					rank['direct'] = len(user_links['links'])
					continue

		for rank in ranks:
			if 'similar' in rank.keys():
				if 'direct' in rank.keys():
					rank['total'] = int(rank['similar']) + (int(rank['direct'])*10)
				else:
					rank['total'] = int(rank['similar'])
			elif 'direct' in rank.keys():
				rank['total'] = int(rank['direct'])*10
			else:
				rank['total'] = 0

		return sorted(ranks, key=lambda rank: rank['total'], reverse=True)

	def prioritise_media(self):
		user_media_collection = self.client.current_db.user_media
		user_graphs_collection = self.client.current_db.user_graphs

		for user in user_graphs_collection.find():
			
			media = []
			#For each user here, put a load of media in staged_media table
			if len(user[u'ranks']) > 0:
				#For each media item, store the user and then their stuff with a priority attached
				for user_media_item in user_media_collection.find():
					for rating in user_media_item['user_ratings']:
						for u in user[u'ranks']:
							if rating[u'user'] == u[u'user']:
								#Here we have a media item with a rating by a user with a priority
								media.append({'user' : user[u'_id'], 'media' : user_media_item[u'_id'], 'similarity' : u['total'], 'similar_user' : u['user']})

			if len(media) > 0:
				self.client.store_prioritised_media(user, media)
Esempio n. 16
0
class Aggregator:

	def __init__(self):
		self.client = MongoConnector()

	def process(self):
		self.process_media()

	def process_media(self):
		for user in self.client.current_db.users.find():

			print 'Aggregating ' + user['name'] + '\'s content'

			#Have media and tag lists on a per-user basis
			self.media = []


			#For each service they like, find the likes
			try:
				for service in user[u'tokens']:

					if (service[u'provider'] == 'vimeo'):
						print
						print 'Grabbing vimeo media...'
					 	vimeo_media = VimeoConnector(service[u'oauth_token']).get_media()
					 	print str(len(vimeo_media)) + " items returned "
					 	self.media.append(('vimeo', vimeo_media))

					if (service[u'provider'] == 'twitter'):
						print
						print 'Grabbing twitter media...'
						twitter_media = TwitterConnector(service[u'uid'],service[u'oauth_token'], service[u'oauth_token_secret']).get_media()
						print str(len(twitter_media)) + " items returned "
						self.media.append(('twitter', twitter_media))

					if (service[u'provider'] == 'soundcloud'):
						print
						print 'Grabbing soundcloud media...'
						soundcloud_media = SoundcloudConnector(service[u'oauth_token']).get_media()
						print str(len(soundcloud_media)) + " items returned "
						self.media.append(('soundcloud', soundcloud_media))

					if (service[u'provider'] == 'google_oauth2'):
						print
						print 'Grabbing youtube media...'
						youtube_media = YoutubeConnector(service[u'oauth_token_secret']).get_media()
						print str(len(youtube_media)) + " items returned "
						self.media.append(('youtube', youtube_media))

					if (service[u'provider'] == 'spotify'):
						print
						print 'Grabbing spotify media...'
						token = SpotifyRefresher(service,user['_id']).check_token()
						spotify_media = SpotifyConnector(token).get_media()
						print str(len(spotify_media)) + " items returned "
						self.media.append(('spotify', spotify_media))

			except KeyError,e:
				print e
				print 'No other service tokens'

			#All processed, insert media and tags together
			self.client.insert_media(user[u'_id'], self.media)
Esempio n. 17
0
    def get(self):
        global mongoConnector

        word_document = mongoConnector.getDocumentByWord(
            request.args.get("word")[1:-1])
        return word_document.guessNextWord()


class Ping(Resource):
    def get(self):

        return "Alive"


if __name__ == "__main__":

    if len(sys.argv) != 5:
        print("Usage: problem3.py <host> <port> <database> <collection>")
        sys.exit(-1)

    mongoConnector = MongoConnector(sys.argv[1], int(sys.argv[2]), sys.argv[3],
                                    sys.argv[4])

    app = Flask(__name__)
    api = Api(app)

    api.add_resource(Ping, '/')
    api.add_resource(Next, '/gutenberg/predict/next/')
    api.add_resource(Guess, '/gutenberg/predict/random/')

    app.run(debug=True)
Esempio n. 18
0
	def __init__(self):
		self.client = MongoConnector()
Esempio n. 19
0
class Relator:

	def __init__(self):
		self.client = MongoConnector()

	def process(self):
		self.process_relationships()

	def process_relationships(self):
		tag_collection = self.client.current_db.tags.find()
		basic_tags = []
		direct_tags = []

		for tag in tag_collection:
			if 'associations' in tag.keys():
				#See if this term appears alongside others
				for assocs in tag[u'associations']:
					#Create a list of all basic tags and their strongly linked phrases
					basic_tags.append((tag[u'phrase'], (assocs[u'phrase'], assocs['count']), tag[u'users']))
			else:
				#See if people like identical titles
				users = [(user[u'user'], user[u'count']) for user in tag[u'users']]
				direct_tags.append((tag[u'phrase'], users))

		similar_tags = self.create_similar_preferences([tag for tag in basic_tags if tag[1][1] > THRESHOLD])
		direct_tags = [tag for tag in direct_tags if len(tag[1]) > 1]

		self.process_relations(similar_tags, direct_tags)

	def process_relations(self, similar_tags, direct_tags):
		relation_collection = self.client.current_db.relations

		for current_user in self.client.current_db.users.find():

			print 'Printing relationships for ' + current_user[u'name'] + ':'
			
			relation_item = relation_collection.find_one({'user': ObjectId(current_user[u'_id']) })

			if relation_item is None:
				relation = {u'user' : current_user[u'_id']}
			else:
				relation = relation_item

			relation[u'direct'] = self.get_direct_links(current_user, direct_tags)
			relation[u'similar'] = self.get_similar_links(current_user, similar_tags)

			self.client.store_relation(relation)

	def get_direct_links(self, current_user, direct_tags):
		direct = []

		#For every other user, append to the direct list all the phrases it shares in common (if any)
		for user in [user for user in self.client.current_db.users.find() if user['_id'] != current_user[u'_id']]:
			tags = []

			#For every full tag
			for direct_tag in direct_tags:
				#If the current user likes this phrase
				if self.direct_tag_contains_this_user(current_user, direct_tag) and self.direct_tag_contains_this_user(user, direct_tag):
					#Add a link to this phrase alongside this user
					tags.append(direct_tag[0])
			if len(tags) > 0:
				direct.append({'user' : user['_id'], 'links' : tags})

		return direct

	def create_similar_preferences(self, similar_tags):

		sets = []
		tag_collection = self.client.current_db.tags

		for current_user in self.client.current_db.users.find():
			linked_sets = set([])

			#If user likes strongly-linked tags x and y, and another user also likes them, they're similar
			for tag in similar_tags:
				if self.similar_tag_contains_this_user(current_user, tag):

					#Here we have a tag, and its strongly linked neighbour
					#Need to see whether the user also likes the strongly linked phrase
					secondary_tag = tag_collection.find_one({'phrase' : tag[1][0]})
					if current_user[u'_id'] in [ids[u'user'] for ids in secondary_tag[u'users']]:
						linked_sets.add(frozenset([tag[0], secondary_tag['phrase']]))

			sets.append({'user' : current_user[u'_id'], 'linked' : linked_sets})

		return sets	

	def get_similar_links(self, current_user, similar_tags):

		similar = []
		#Hacky list comprehension way of getting the linked information for the current user
		current_user_set = [similar_tag['linked'] for similar_tag in similar_tags if similar_tag['user'] == current_user[u'_id']][0]

		for user in [similar_tag for similar_tag in similar_tags if similar_tag['user'] != current_user[u'_id']]:
			intersection = user['linked'].intersection(current_user_set)
			if len(intersection) > 0:
				print current_user[u'name'] + ' has something in common with ' + self.client.current_db.users.find_one({'_id' : user['user']})['name'] + '!'
				print 'They are both interested in:'
				print [list(item) for item in list(intersection)]
				similar.append({'user' : user['user'], 'links' : [list(item) for item in list(intersection)]})