def generate_tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity(model_ids, startTime, endTime, outputFolder): def location_similarity(location_vector_1, location_vector_2): return reduce(lambda total, k: total+(location_vector_1.get(k,0)*location_vector_2.get(k,0)), set(location_vector_1.keys()).union(location_vector_2.keys()),0.) influence_types=[InfluenceMeasuringModels.TYPE_COMPLETE_INFLUENCE, InfluenceMeasuringModels.TYPE_OUTGOING_INFLUENCE, InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE] for model_id in model_ids: mf_location_to_mf_influence_type_to_influence_vector = dict(Experiments.load_tuo_location_and_mf_influence_type_to_influence_vector(model_id)) GeneralMethods.runCommand('rm -rf %s'%tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id) for line_count, location_object in enumerate(iterateJsonFromFile( location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d')) )): print line_count location = location_object['id'] tuo_neighbor_location_and_mf_influence_type_and_similarity = [] for neighbor_location in location_object['links'].keys(): mf_influence_type_and_similarity = {} for influence_type in influence_types: similarity = location_similarity( mf_location_to_mf_influence_type_to_influence_vector[location][influence_type], mf_location_to_mf_influence_type_to_influence_vector[neighbor_location][influence_type] ) mf_influence_type_and_similarity[influence_type] = similarity so_hashtags_for_location = set(location_object['hashtags'].keys()) so_hashtags_for_neighbor_location = set(location_object['links'][neighbor_location].keys()) numerator = len(so_hashtags_for_location.intersection(so_hashtags_for_neighbor_location)) + 0. denominator = len(so_hashtags_for_location.union(so_hashtags_for_neighbor_location)) + 0. mf_influence_type_and_similarity[JACCARD_SIMILARITY] = numerator/denominator tuo_neighbor_location_and_mf_influence_type_and_similarity.append([neighbor_location, mf_influence_type_and_similarity]) FileIO.writeToFileAsJson( [location, tuo_neighbor_location_and_mf_influence_type_and_similarity], tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id )
def generate_tuo_location_and_tuo_neighbor_location_and_pure_influence_score(models_ids, startTime, endTime, outputFolder, hashtag_tag): for model_id in models_ids: # if w_extra_hashtags: output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, hashtag_tag) # else: output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, wout_extra_hashtags_tag) output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, hashtag_tag) GeneralMethods.runCommand('rm -rf %s'%output_file) for line_count, location_object in enumerate(iterateJsonFromFile( location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d')) )): print line_count, model_id tuo_neighbor_location_and_pure_influence_score = [] location_hashtag_set = set(location_object['hashtags']) for neighbor_location, mf_hashtag_to_tuo_occurrences_and_time_range in location_object['links'].iteritems(): pure_influence_scores = [] for hashtag, (neighbor_location_occurrences, time_range) in mf_hashtag_to_tuo_occurrences_and_time_range.iteritems(): if hashtag in location_object['hashtags']: location_occurrences = location_object['hashtags'][hashtag][0] pure_influence_scores.append(MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[model_id](location_occurrences, neighbor_location_occurrences)) neighbor_location_hashtag_set = set(mf_hashtag_to_tuo_occurrences_and_time_range.keys()) if hashtag_tag==w_extra_hashtags_tag: for hashtag in location_hashtag_set.difference(neighbor_location_hashtag_set): pure_influence_scores.append(1.0) for hashtag in neighbor_location_hashtag_set.difference(location_hashtag_set): pure_influence_scores.append(-1.0) mean_pure_influence_score = np.mean(pure_influence_scores) tuo_neighbor_location_and_pure_influence_score.append([neighbor_location, mean_pure_influence_score]) tuo_neighbor_location_and_pure_influence_score = sorted(tuo_neighbor_location_and_pure_influence_score, key=itemgetter(1)) FileIO.writeToFileAsJson([location_object['id'], tuo_neighbor_location_and_pure_influence_score], output_file)
def load_ltuo_hashtag_and_ltuo_location_and_occurrence_time(startTime=datetime(2012, 1, 1), endTime=datetime(2012, 3, 31), outputFolder='complete_prop'): ltuo_hashtag_and_ltuo_location_and_occurrence_time = [] for hashtag_object in \ iterateJsonFromFile(f_hashtag_objects%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))): ltuo_location_and_occurrence_time = [ (getLatticeLid(point, LOCATION_ACCURACY), occurrence_time) for point, occurrence_time in hashtag_object['oc'] ] ltuo_hashtag_and_ltuo_location_and_occurrence_time.append([hashtag_object['h'], ltuo_location_and_occurrence_time]) return ltuo_hashtag_and_ltuo_location_and_occurrence_time
def generate_tuo_location_and_tuo_neighbor_location_and_sharing_affinity_score(model_ids, startTime, endTime, outputFolder): for model_id in model_ids: for line_count, location_object in enumerate(iterateJsonFromFile( location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d')) )): print line_count mf_from_neighbor_location_to_sharing_affinity_score = {} so_hashtags = set(location_object['hashtags']) for neighbor_location, neighbor_hashtags in location_object['links'].iteritems(): so_neighbor_hashtags = set(neighbor_hashtags) mf_from_neighbor_location_to_sharing_affinity_score[neighbor_location]=len(so_hashtags.intersection(so_neighbor_hashtags))/float(len(so_hashtags)) FileIO.writeToFileAsJson([ location_object['id'], sorted(mf_from_neighbor_location_to_sharing_affinity_score.iteritems(), key=itemgetter(1), reverse=True) ], tuo_location_and_tuo_neighbor_location_and_sharing_affinity_score_file%model_id)
def generate_tuo_location_and_tuo_neighbor_location_and_influence_score(models_ids, startTime, endTime, outputFolder, hashtag_tag): def get_hashtag_weights(map_from_hashtag_to_tuples_of_occurrences_and_time_range): total_occurrences = sum([len(occurrences) for hashtag, (occurrences, time_range) in map_from_hashtag_to_tuples_of_occurrences_and_time_range.iteritems()]) + 0. return dict([(hashtag, len(occurrences)/total_occurrences) for hashtag, (occurrences, time_range) in map_from_hashtag_to_tuples_of_occurrences_and_time_range.iteritems()]) def get_location_weights(hashtags_for_source_location, map_from_location_to_hashtags): set_of_hashtags_for_source_location = set(hashtags_for_source_location.keys()) return dict([(location, len(set(hashtags.keys()).intersection(set_of_hashtags_for_source_location))/(len(set_of_hashtags_for_source_location)+0.)) for location, hashtags in map_from_location_to_hashtags.iteritems()]) for model_id in models_ids: output_file = tuo_location_and_tuo_neighbor_location_and_influence_score_file%(model_id, hashtag_tag) GeneralMethods.runCommand('rm -rf %s'%output_file) for line_count, location_object in enumerate(iterateJsonFromFile( location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d')) )): print line_count, model_id tuo_neighbor_location_and_influence_score = [] mf_hashtag_to_hashtag_weights = get_hashtag_weights(location_object['hashtags']) mf_location_to_location_weights = get_location_weights(location_object['hashtags'], location_object['links']) location_hashtag_set = set(location_object['hashtags']) for neighbor_location, mf_hashtag_to_tuo_occurrences_and_time_range in location_object['links'].iteritems(): influence_scores = [] mf_neighbor_location_hashtag_to_hashtag_weights = get_hashtag_weights(mf_hashtag_to_tuo_occurrences_and_time_range) neighbor_location_hashtag_set = set(mf_hashtag_to_tuo_occurrences_and_time_range.keys()) for hashtag, (neighbor_location_occurrences, time_range) in mf_hashtag_to_tuo_occurrences_and_time_range.iteritems(): if hashtag in location_object['hashtags']: location_occurrences = location_object['hashtags'][hashtag][0] pure_influence_score = MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[model_id](location_occurrences, neighbor_location_occurrences) influence_scores.append(mf_hashtag_to_hashtag_weights[hashtag]*pure_influence_score) if hashtag_tag==w_extra_hashtags_tag: for hashtag in location_hashtag_set.difference(neighbor_location_hashtag_set): influence_scores.append(mf_hashtag_to_hashtag_weights[hashtag]*1.0) # influence_scores.append(1.0) for hashtag in neighbor_location_hashtag_set.difference(location_hashtag_set): influence_scores.append(mf_neighbor_location_hashtag_to_hashtag_weights[hashtag]*-1.0) # influence_scores.append(-1.0) mean_influence_scores = np.mean(influence_scores) tuo_neighbor_location_and_influence_score.append([neighbor_location, mf_location_to_location_weights[neighbor_location]*mean_influence_scores]) tuo_neighbor_location_and_influence_score = sorted(tuo_neighbor_location_and_influence_score, key=itemgetter(1)) FileIO.writeToFileAsJson([location_object['id'], tuo_neighbor_location_and_influence_score], output_file)
def load_ltuo_test_hashtag_and_ltuo_location_and_pure_influence_score(test_model_id): return [ (hashtag, ltuo_location_and_pure_influence_score) for hashtag, ltuo_location_and_pure_influence_score in iterateJsonFromFile(f_ltuo_hashtag_and_ltuo_location_and_pure_influence_score%(test_model_id)) ]
def load_ltuo_location_and_no_of_occurrences(START_TIME=datetime(2011, 5, 1), END_TIME=datetime(2011, 12, 31), WINDOW_OUTPUT_FOLDER='complete_prop'): ltuo_location_and_no_of_occurrences = [] for location, ltuo_hashtag_and_occurrence_time in \ iterateJsonFromFile(f_ltuo_location_and_ltuo_hashtag_and_occurrence_time%(WINDOW_OUTPUT_FOLDER, START_TIME.strftime('%Y-%m-%d'), END_TIME.strftime('%Y-%m-%d'))): ltuo_location_and_no_of_occurrences.append([location, len(ltuo_hashtag_and_occurrence_time)]) return ltuo_location_and_no_of_occurrences
def load_tuo_location_and_tuo_neighbor_location_and_sharing_affinity_score(model_id): return [(location, tuo_neighbor_location_and_sharing_affinity_score) for location, tuo_neighbor_location_and_sharing_affinity_score in iterateJsonFromFile(tuo_location_and_tuo_neighbor_location_and_sharing_affinity_score_file%model_id)]
def load_tuo_location_and_tuo_neighbor_location_and_influence_score(model_id, hashtag_tag): return [(location, tuo_neighbor_location_and_influence_score) for location, tuo_neighbor_location_and_influence_score in iterateJsonFromFile(tuo_location_and_tuo_neighbor_location_and_influence_score_file%(model_id, hashtag_tag))]