def movieGather(movie_likes): movie_genres = {} full_movie_cat = [] for like in movie_likes: like_check = re.sub("[\\W]", " ", like['name'].strip()) entity = text_analysis_freebase.search(like_check, "movie") if entity: gens = entity['genres'] try: full_movie_cat.append({ 'fb_id': like['id'], 'like_name': entity['title'], 'genres': entity['genres'] }) except Exception, e: print e print entity full_movie_cat.append({ 'fb_id': like['id'], 'like_name': entity['name'], 'genres': entity['genres'] }) #CHECK THIS for gen in gens: if gen not in movie_genres.keys(): movie_genres[gen] = 1 else: movie_genres[gen] += 1
def musicGather(music_likes): music_genres = {} full_music_cat = [] for like in music_likes: gens = [] entity = text_analysis_freebase.search(like['name'],"music") if entity: gens = entity['genres'] full_music_cat.append({'fb_id':like['id'],'like_name':entity['name'],'genres':gens}) for gen in gens: if gen not in music_genres: music_genres[gen] = 1 else: music_genres[gen] += 1 return music_genres,full_music_cat
def movieGather(movie_likes): movie_genres={} full_movie_cat = [] for like in movie_likes: like_check = re.sub("[\\W]"," ",like['name'].strip()) entity = text_analysis_freebase.search(like_check,"movie") if entity: gens = entity['genres'] try: full_movie_cat.append({'fb_id':like['id'],'like_name':entity['title'],'genres':entity['genres']}) except Exception, e: print e print entity full_movie_cat.append({'fb_id':like['id'],'like_name':entity['name'],'genres':entity['genres']}) #CHECK THIS for gen in gens: if gen not in movie_genres.keys(): movie_genres[gen] = 1 else: movie_genres[gen] += 1
def musicGather(music_likes): music_genres = {} full_music_cat = [] for like in music_likes: gens = [] entity = text_analysis_freebase.search(like['name'], "music") if entity: gens = entity['genres'] full_music_cat.append({ 'fb_id': like['id'], 'like_name': entity['name'], 'genres': gens }) for gen in gens: if gen not in music_genres: music_genres[gen] = 1 else: music_genres[gen] += 1 return music_genres, full_music_cat
def readStatusAndCreateLists(uid): client = MongoClient('127.0.0.1') db = client.recommendation_db user_coll = db.users movies10 = [] music10 = [] friends_list = [] query = user_coll.find_one({"id":uid}) user = query acc_token = user['token'] graph = fb.GraphAPI(acc_token) scores = user['scores'] lim = len(scores) music_genres = user['music_genres'] movie_genres = user['movie_genres'] proc_posts = user['proc_posts'] #ore ex = datetime.now() - timedelta(days=14) fields = {"fields":'links.since('+str(ex)+').fields(comments.fields(id,from,message).limit(10),message,link,id,name,created_time,description),name,picture,statuses.since('+str(ex)+').fields(comments.fields(id,from,message).limit(10),message,id,updated_time),video.watches.fields(id,data,publish_time,message),music.listens.fields(id,data,publish_time,message)'} for i in range(0,lim): args = {} links_filtered = [] statuses_filtered = [] videos = [] music = [] if not scores[i]['music_score'] and not scores[i]['movie_score']: continue try: statuses = graph.get_object(scores[i]['facebook_id'],**fields) except: print str(scores[i]['friend_name'].encode('utf8')) + " - Graph error\n" continue if 'links' in statuses: links_filtered = statuses['links']['data'] if 'statuses' in statuses: statuses_filtered = statuses['statuses']['data'] if 'video.watches' in statuses: videos = statuses['video.watches']['data'] if 'music.listens' in statuses: music = statuses['music.listens']['data'] picture = statuses['picture']['data']['url'] for video in videos: p_time = video['publish_time'] if 'movie' in video['data'] and p_time > str(ex): entity = video['data']['movie']['title'] post_id = video['id'] if post_id not in proc_posts: proc_posts.append(post_id) else: continue check = text_analysis_freebase.search(entity,"movie") entity = check if bool(entity): ubuff = 0 for gen in entity['genres']: if (gen in movie_genres.keys()): ubuff += movie_genres[gen] ubuff = float(ubuff)/len(entity['genres']) score_now = float(scores[i]['movie_score']['movie_friend_score'])*ubuff title,url,description = youtubeAPI.getVideo(entity) if title: exist_flag = 0 for it in movies10: #TODO O markos to eixe valei se sxolia if ((post_id == it["post_id"]) and (url == it["embed"])) or url == it["embed"]: #edw paizei na prepei na be "or" analoga me to ama 8eloume genika na uparxei to 1 vid 1 fora mono. exist_flag = 1 if (exist_flag == 0): movies10.append({"name":scores[i]['friend_name'],"f_id":scores[i]['facebook_id'],"post_id":post_id,"score":score_now,'picture':picture,'title':title,'embed':url,'description':description,'created':p_time[:10],'genres':entity['genres'],'rated':0}) for item in music: p_time = item['publish_time'] if 'musician' in item['data'] and p_time > str(ex): entity = item['data']['musician']['title'] post_id = item['id'] if post_id not in proc_posts: proc_posts.append(post_id) else: continue check = text_analysis_freebase.search(entity,"music") entity = check if bool(entity): ubuff=0 for gen in entity['genres']: if (gen in movie_genres.keys()): ubuff += movie_genres[gen] ubuff = float(ubuff)/len(entity['genres']) score_now = float(scores[i]['movie_score']['movie_friend_score'])*ubuff title,url,description = youtubeAPI.getVideo(entity) if title: exist_flag = 0 for it in music10: if ((post_id == it["post_id"]) and (url == it["embed"])) or url == it["embed"]: #edw paizei na prepei na be "or" analoga me to ama 8eloume genika na uparxei to 1 vid 1 fora mono. exist_flag = 1 if exist_flag == 0: music10.append({"name":scores[i]['friend_name'],"f_id":scores[i]['facebook_id'],"post_id":post_id,"score":score_now,'picture':picture,'title':title,'embed':url,'description':description,'created':p_time[:10],'genres':entity['genres'],'rated':0}) args['i'] = i args['scores'] = scores args['movie'] = movie_genres args['music'] = music_genres args['pic'] = picture for link in links_filtered: if 'link' in link: proc_posts, movies10, music10 = process_link(link,proc_posts,movies10,music10,args) for status in statuses_filtered: status_flag = 1 if 'message' in status: lme = status['message'] try:#ore lang = classify(lme) if lang[0]!='en': continue except Exception as e: print e continue split_buffer = status['message'].split(" ") for word in split_buffer: if 'http://' in word: link_buffer = {} link_buffer['id'] = status['id'] link_buffer['link'] = word if 'updated_time' in status: link_buffer['created_time'] = status['updated_time'] proc_posts, movies10, music10 = process_link(link_buffer,proc_posts,movies10,music10,args) status_flag = 0 break if status_flag == 1: proc_posts, movies10, music10 = process_status(status,proc_posts,movies10,music10,args) movies10 = sorted(movies10, key=lambda k:k['created']) movies10 = movies10[::-1] music10 = sorted(music10, key=lambda k:k['created']) music10 = music10[::-1] user_coll.update({"id":uid},{"$set":{"movies10":movies10,"music10":music10,"has_lists":1,"proc_posts":proc_posts}})
def getUserHistory(user): history_posts = [] my_posts = [] my_music_actions = [] my_movie_actions = [] token = user['token'] ex = datetime.now() - timedelta(days=60) graph = fb.GraphAPI(token) fields = {'fields':'links.since('+str(ex)+').fields(link,id,name,created_time).limit(100),name,video.watches.fields(id,data,publish_time).limit(100)'} my_wall = graph.get_object('me',**fields) if 'links' in my_wall: my_posts = my_wall['links']['data'] if 'video.watches' in my_wall: my_movie_actions = my_wall['video.watches']['data'] #Deprecated # if 'music.listens' in my_wall: # my_music_actions = my_wall['music.listens']['data'] for video in my_movie_actions: check = {} p_time = video['publish_time'] if 'movie' in video['data']: entity = video['data']['movie']['title'] post_id = video['id'] check = text_analysis_freebase.search(entity,"movie") entity = check if bool(entity): for gen in entity['genres']: if (gen.lower() in user['movie_genres'].keys()): user['movie_genres'][gen.lower()] += 1 else: user['movie_genres'][gen.lower()] = 1 if title: user['movie_categories'].append({'like_name':entity['name'],'fb_id':post_id,'genres':entity['genres']}) #Deprecated # for item in my_music_actions: # check = {} # p_time = item['publish_time'] # if 'musician' in item['data']: # entity = item['data']['musician']['title'] # post_id = item['id'] # check = text_analysis_freebase.search(entity,"music") # entity = check # if bool(entity): # for gen in entity['genres']: # if (gen.lower() in user['movie_genres'].keys()): # user['music_genre_history'][gen.lower()] += 1 # else: # user['music_genre_history'][gen.lower()] = 1 # if title: # user['music_categories'].append({'like_name':entity['name'],'fb_id':post_id,'genres':entity['genres']}) user,history_movie_entities,history_music_entities = processHistoryLink(my_posts,user) for mo_genres in user['movie_genres']: totalWeightOfUserMovieGenres+=user['movie_genres'][mo_genres] #this is a total weight consisting by the sum of each genre weight for mu_genres in user['music_genres']: totalWeightOfUserMusicGenres+=user['music_genres'][mu_genres] #same applies for music genres user['total_movie_score'] = totalWeightOfUserMovieGenres user['total_music_score'] = totalWeightOfUserMusicGenres user['movie_likes_score'] = getScoreSum(user['movie_categories'], user['movie_genres']) user['music_likes_score'] = getScoreSum(user['music_categories'], user['music_genres']) return user
def readStatusAndCreateLists(uid): client = MongoClient('127.0.0.1') db = client.recommendation_db user_coll = db.users movies10 = [] music10 = [] friends_list = [] query = user_coll.find_one({"id": uid}) user = query acc_token = user['token'] graph = fb.GraphAPI(acc_token) scores = user['scores'] lim = len(scores) music_genres = user['music_genres'] movie_genres = user['movie_genres'] proc_posts = user['proc_posts'] #ore ex = datetime.now() - timedelta(days=14) fields = { "fields": 'links.since(' + str(ex) + ').fields(comments.fields(id,from,message).limit(10),message,link,id,name,created_time,description),name,picture,statuses.since(' + str(ex) + ').fields(comments.fields(id,from,message).limit(10),message,id,updated_time),video.watches.fields(id,data,publish_time,message),music.listens.fields(id,data,publish_time,message)' } for i in range(0, lim): args = {} links_filtered = [] statuses_filtered = [] videos = [] music = [] if not scores[i]['music_score'] and not scores[i]['movie_score']: continue try: statuses = graph.get_object(scores[i]['facebook_id'], **fields) except: print str( scores[i]['friend_name'].encode('utf8')) + " - Graph error\n" continue if 'links' in statuses: links_filtered = statuses['links']['data'] if 'statuses' in statuses: statuses_filtered = statuses['statuses']['data'] if 'video.watches' in statuses: videos = statuses['video.watches']['data'] if 'music.listens' in statuses: music = statuses['music.listens']['data'] picture = statuses['picture']['data']['url'] for video in videos: p_time = video['publish_time'] if 'movie' in video['data'] and p_time > str(ex): entity = video['data']['movie']['title'] post_id = video['id'] if post_id not in proc_posts: proc_posts.append(post_id) else: continue check = text_analysis_freebase.search(entity, "movie") entity = check if bool(entity): ubuff = 0 for gen in entity['genres']: if (gen in movie_genres.keys()): ubuff += movie_genres[gen] ubuff = float(ubuff) / len(entity['genres']) score_now = float( scores[i]['movie_score']['movie_friend_score']) * ubuff title, url, description = youtubeAPI.getVideo(entity) if title: exist_flag = 0 for it in movies10: #TODO O markos to eixe valei se sxolia if ((post_id == it["post_id"]) and (url == it["embed"])) or url == it[ "embed"]: #edw paizei na prepei na be "or" analoga me to ama 8eloume genika na uparxei to 1 vid 1 fora mono. exist_flag = 1 if (exist_flag == 0): movies10.append({ "name": scores[i]['friend_name'], "f_id": scores[i]['facebook_id'], "post_id": post_id, "score": score_now, 'picture': picture, 'title': title, 'embed': url, 'description': description, 'created': p_time[:10], 'genres': entity['genres'], 'rated': 0 }) for item in music: p_time = item['publish_time'] if 'musician' in item['data'] and p_time > str(ex): entity = item['data']['musician']['title'] post_id = item['id'] if post_id not in proc_posts: proc_posts.append(post_id) else: continue check = text_analysis_freebase.search(entity, "music") entity = check if bool(entity): ubuff = 0 for gen in entity['genres']: if (gen in movie_genres.keys()): ubuff += movie_genres[gen] ubuff = float(ubuff) / len(entity['genres']) score_now = float( scores[i]['movie_score']['movie_friend_score']) * ubuff title, url, description = youtubeAPI.getVideo(entity) if title: exist_flag = 0 for it in music10: if ((post_id == it["post_id"]) and (url == it["embed"])) or url == it[ "embed"]: #edw paizei na prepei na be "or" analoga me to ama 8eloume genika na uparxei to 1 vid 1 fora mono. exist_flag = 1 if exist_flag == 0: music10.append({ "name": scores[i]['friend_name'], "f_id": scores[i]['facebook_id'], "post_id": post_id, "score": score_now, 'picture': picture, 'title': title, 'embed': url, 'description': description, 'created': p_time[:10], 'genres': entity['genres'], 'rated': 0 }) args['i'] = i args['scores'] = scores args['movie'] = movie_genres args['music'] = music_genres args['pic'] = picture for link in links_filtered: if 'link' in link: proc_posts, movies10, music10 = process_link( link, proc_posts, movies10, music10, args) for status in statuses_filtered: status_flag = 1 if 'message' in status: lme = status['message'] try: #ore lang = classify(lme) if lang[0] != 'en': continue except Exception as e: print e continue split_buffer = status['message'].split(" ") for word in split_buffer: if 'http://' in word: link_buffer = {} link_buffer['id'] = status['id'] link_buffer['link'] = word if 'updated_time' in status: link_buffer['created_time'] = status[ 'updated_time'] proc_posts, movies10, music10 = process_link( link_buffer, proc_posts, movies10, music10, args) status_flag = 0 break if status_flag == 1: proc_posts, movies10, music10 = process_status( status, proc_posts, movies10, music10, args) movies10 = sorted(movies10, key=lambda k: k['created']) movies10 = movies10[::-1] music10 = sorted(music10, key=lambda k: k['created']) music10 = music10[::-1] user_coll.update({"id": uid}, { "$set": { "movies10": movies10, "music10": music10, "has_lists": 1, "proc_posts": proc_posts } })
def getUserHistory(user): history_posts = [] my_posts = [] my_music_actions = [] my_movie_actions = [] token = user['token'] ex = datetime.now() - timedelta(days=60) graph = fb.GraphAPI(token) fields = { 'fields': 'links.since(' + str(ex) + ').fields(link,id,name,created_time).limit(100),name,video.watches.fields(id,data,publish_time).limit(100)' } my_wall = graph.get_object('me', **fields) if 'links' in my_wall: my_posts = my_wall['links']['data'] if 'video.watches' in my_wall: my_movie_actions = my_wall['video.watches']['data'] #Deprecated # if 'music.listens' in my_wall: # my_music_actions = my_wall['music.listens']['data'] for video in my_movie_actions: check = {} p_time = video['publish_time'] if 'movie' in video['data']: entity = video['data']['movie']['title'] post_id = video['id'] check = text_analysis_freebase.search(entity, "movie") entity = check if bool(entity): for gen in entity['genres']: if (gen.lower() in user['movie_genres'].keys()): user['movie_genres'][gen.lower()] += 1 else: user['movie_genres'][gen.lower()] = 1 if title: user['movie_categories'].append({ 'like_name': entity['name'], 'fb_id': post_id, 'genres': entity['genres'] }) #Deprecated # for item in my_music_actions: # check = {} # p_time = item['publish_time'] # if 'musician' in item['data']: # entity = item['data']['musician']['title'] # post_id = item['id'] # check = text_analysis_freebase.search(entity,"music") # entity = check # if bool(entity): # for gen in entity['genres']: # if (gen.lower() in user['movie_genres'].keys()): # user['music_genre_history'][gen.lower()] += 1 # else: # user['music_genre_history'][gen.lower()] = 1 # if title: # user['music_categories'].append({'like_name':entity['name'],'fb_id':post_id,'genres':entity['genres']}) user, history_movie_entities, history_music_entities = processHistoryLink( my_posts, user) for mo_genres in user['movie_genres']: totalWeightOfUserMovieGenres += user['movie_genres'][ mo_genres] #this is a total weight consisting by the sum of each genre weight for mu_genres in user['music_genres']: totalWeightOfUserMusicGenres += user['music_genres'][ mu_genres] #same applies for music genres user['total_movie_score'] = totalWeightOfUserMovieGenres user['total_music_score'] = totalWeightOfUserMusicGenres user['movie_likes_score'] = getScoreSum(user['movie_categories'], user['movie_genres']) user['music_likes_score'] = getScoreSum(user['music_categories'], user['music_genres']) return user