def main(): print("Welcome to my nonogram solver program!") print( "Use existant nonogram or type in new nonogram?\nexsistant: 'y', new: 'n', result from file: 'f'" ) choise = input() while choise not in ['y', 'n', 'f']: print("enter 'y', 'n' or 'f'") choise = input() if choise == 'n': data.get_user_data() elif choise == 'f': print("enter result file path: ") path = input() data.read_res_from_file(path) has_improvement = True while has_improvement: # (has_improvement or (True in ROWS_HAS_CHANGE) or (True in COLUMNS_HAS_CHANGE)): has_improvement = move.iteration() print("\n\n\nafter iteration:\n") data.print_nonogram() #check if the nonogram was finished: if (tools.nonogram_was_solved()): print("finished successfully!") data.print_nonogram() data.save_res_to_file() else: print("not succeeded finishing nonogram") data.print_nonogram()
def get_features(filename): # get dataframe df = pd.read_csv(filename, sep='\t', encoding='utf-8') # create datasets for each user labels = {user: get_user_data(df, user) for user in usernames} # removes features that are empty in each user # also count number of users counts = {} for user in usernames: for feature in labels[user]: flag = labels[user][feature].isnull().all() if flag: labels[user].drop(feature, 1, inplace=True) # Create counts counts[user] = labels[user].count(axis=0) counts[user].drop('likes', inplace=True) counts[user].drop('username', inplace=True) counts[user].drop('display_src', inplace=True) counts[user].drop('hour', inplace=True) counts[user].drop('day', inplace=True) # remove features with counts of less than n n = 10 to_remove = counts[user][counts[user] <= n].index labels[user].drop(to_remove, 1, inplace=True) # recreate counts counts[user] = labels[user].count(axis=0) counts[user].drop('likes', inplace=True) counts[user].drop('username', inplace=True) counts[user].drop('display_src', inplace=True) counts[user].drop('hour', inplace=True) counts[user].drop('day', inplace=True) # print counts[user].head() # print user, (len(labels[user].columns) - 3) path = '../data/labels/' + user + '.csv' labels[user].to_csv(path_or_buf=path, sep='\t', index=False, encoding='utf-8') return labels, counts
def thinking_about_this_stuff(): import networkx as nx import community #from make_graph import make_graph api_path = '../api_keys/' screen_name = 'graphlabteam' apis = oauth_login(api_path) target, target_tweets, followers, following, user_lists = \ get_user_data(apis[0], screen_name) edges = make_graph(target['id'], followers, apis) g = nx.Graph(data=edges) p0 = community.best_partition(g) p1 = community.best_partition(g, partition=p0) while p0 != p1: p0 = community.best_partition(g, partition=p1) p1 = community.best_partition(g, partition=p0) partitions = [[k for k in p1.keys() if p1[k] == v] for v in set(p1.values())]
def load(screen_name=None, user_id=None, force_db_update = False, force_twitter_update=False, debug=False): ''' Main entry point into gravitty module. Should be used by importing gravitty and calling gravitty.load('<your_screen_name'). Please see the readme at github.com/ericjeske/gravitty for mandatory setup instructions and api requirements. The load function will make every attempt to load data from cache sources (mongoDB) before using twitter's api. It is, however, suggested that multiple twitter api keys are utilized with this app to avoid rate limiting restrictions. By default, running this function will return a json object that can be parsed by d3.js to create a community graph. Additional information, including the raw twitter data, parsed twitter data, user similarity, community clustering dendrogram, community analytics data, community networkx graph, and community json object, can be returned by passing in debug=True. Also, by default, this app will create two pickled objects, one containing the debug data described above, the other containing the community json file. Subsequent calls for the same user will use this data to save time (and api calls). To override the use of pickled data, use force_db_update = True. Data for each follower will be pulled from mongoDB if possible, otherwise it will be pulled from twitter. To do a clean-slate download, downloading everything from twitter, use force_twitter_update = True. ''' if screen_name == None and user_id == None: raise Exception('Please enter an id or name') # Assume that if screen_name was not provided (only user id) then a # pickle has not been created. if screen_name is not None: ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH) sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT # Check to see if there are pickles for the user. Note that this will # be overriden if force_db_update is set to true if os.path.isfile(sn_file_debug) and debug \ and not force_twitter_update and not force_db_update: return pickle.load(open(sn_file_debug, 'rb')) if os.path.isfile(sn_file) \ and not force_twitter_update and not force_db_update: return pickle.load(open(sn_file, 'rb')) # Use api credentials from files located in the API_PATH. ABS_API_PATH = os.path.join(os.path.dirname(__file__), API_PATH) apis = oauth_login(ABS_API_PATH) # Try to start up a mongo database connection to cache data in try: conn = pymongo.MongoClient("localhost", 27017) except pymongo.errors.ConnectionFailure: print 'Please run mongod and re-run program' raise Exception('DBError') db = conn[DB_NAME] # Get the target user's data from either the screen_name or user_id user_data = get_user_data(db, apis[0], name = screen_name, uid = user_id, force = force_twitter_update) # If the user is protected (or has more than the maximum # followers/friends), then return an error if user_data == None: print 'Was unable to access data for %s / %s' % (screen_name, user_id) raise Exception('TargetError') user_info, user_tweets, followers, following, user_lists = user_data # Using the target user's list of followers (user ids), get the same # information we just got for the target user for each of its followers raw_df = get_follower_data(db, apis, followers, force = force_twitter_update) # Filter the dataframe for inactive users. Then parse the raw dataframe # to extract the relevant features from the raw data df = parse_dataframe( filter_dataframe(raw_df) ) # With the features in hand, calculate the latent similarity between each # set of users. See similarity.py for more detail on the calculations of # this similarity metric. # The resulting dataframe will be a square matrix indexed/columned by # user_id and contain the undirected edge weights between each pair of # users. df_similarity = make_similarity_dataframe(df) # Make an undirected representing the relationship between each user, # if any. Each node ID is the user ID, each edge weight is equal to the # similarity score between those two users. graph = make_graph(df, df_similarity) # Using the louvain method, find communities within the weighted graph. # The returned dendrogram is a list of dictionaries where the values of # each dictionary are the keys of the next dictionary. The length of the # dendrogram indicates the number of levels of community clusters # detected. dendrogram = generate_dendrogram(graph) # Add a final mapping to the dendrogram that maps everyone into the # same community. They are, after all, followers of the same user. dendrogram.append({k:0 for k in dendrogram[-1].values()}) # Modify the dataframe to contain columns titled 'cid + <level>'. Each # column contains the community id's for that level for each user. # Also, this is a convenient time to calculate graph modularity at each # level so produce that here as well. df, modularity = get_community_assignment(df, graph, dendrogram) num_levels = len(dendrogram) # For each community at each level of the dendrogram, find the topics, # sentiment, biggest influencers, etc. for each. data = get_community_analytics(df, graph, num_levels, community_modularity = modularity) # Both the mentioned and most connected users fields from the community # analytics function are user ids. Turn them into screen names. data = get_screen_names(data, 'mentioned', df, db, apis[0]) data = get_screen_names(data, 'most_connected', df, db, apis[0]) # Close the database connection. It is no longer needed. conn.close() # Create a networkx graph where each node represents a community. Edges # represent membership into larger communities at the next level up ( # down?) the dendrogram and have no edge weights. The data obtained in # the previous steps from community_analytics is loaded into the # attributes of each node. community_graph = create_community_graph(data, dendrogram) # Parse this graph into a json representation for use & consumption by # d3.js community_json = create_community_json(community_graph, user_info) # Just in case we don't have the screen name, grab it. if screen_name is None: screen_name = user_info['screen_name'] # Pickle the objects for reuse. ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH) sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT pickle.dump((raw_df, df, df_similarity, dendrogram, data, community_graph, community_json), open(sn_file_debug, 'wb')) pickle.dump(community_json, open(sn_file, 'wb')) # If debug is true, return all of the precusor objects along with the json if debug: return (raw_df, df, df_similarity, dendrogram, data, community_graph, community_json) # Otherwise return the json object return community_json
def main(): print("Welcome to my nonogram solver program!") print("Use existant nonogram or type in new nonogram?\nexsistant: 'y', new: 'n', result from file: 'f'") choise = input() while choise not in ['y','n','f']: print("enter 'y', 'n' or 'f'") choise = input() if choise == 'n': data.get_user_data() elif choise == 'f': print("enter result file path: ") path = input() data.read_res_from_file(path) has_improvement = True begin = time.time() while not tools.nonogram_was_solved(): #has_improvement = False ic = 0 while (has_improvement): has_improvement = left_right.iteration() # without recursion print("after ", ic, "'th iteration:") data.print_nonogram() ic+=1 #for i in range(data.ROWS): # data.ROWS_HAS_CHANGE[i] = True # is there a difference from the previous round # data.COLUMNS_HAS_CHANGE[i] = True # --"-- if tools.nonogram_was_solved(): break print("after basic iteration (without recursion): ") data.print_nonogram() ic = 0 while not has_improvement: # (has_improvement or (True in ROWS_HAS_CHANGE) or (True in COLUMNS_HAS_CHANGE)): print("working on " + str(ic) + "'th iteration...") rec_1_begin = time.time() has_improvement = move.iteration() #perm.iteration() rec_1_end = time.time() print("after " + str(ic) + "'th iteration:") print("iteration " + str(ic) + " took " + str(round(rec_1_end - rec_1_begin, 3))) print("has_improvement:", has_improvement) print("ROWS_HAS_CHANGE:", data.ROWS_HAS_CHANGE) print("COLUMNS_HAS_CHANGE:", data.COLUMNS_HAS_CHANGE) data.print_nonogram() ic+=1 end = time.time() #check if the nonogram was finished: if (tools.nonogram_was_solved()): print("finished successfully!") data.print_nonogram() data.save_res_to_file() else: print("not succeeded finishing nonogram") data.print_nonogram() # print("enters to rec: " + str(perm.REC_COUNTER[0])) print("total time took is: " + str(round(end - begin, 3)))
# -*- coding: utf-8 -*- from utils import oauth_login from data import get_user_data, get_follower_data #from make_graph import make_graph api_path = '../api_keys/' screen_name = 'ZipfianAcademy' user_id = 1244850380 #screen_name = 'graphlabteam' apis = oauth_login(api_path)[3:] target, target_tweets, followers, following, user_lists = \ get_user_data(apis[2], screen_name=screen_name, user_id=user_id) print target['id'] ind = followers.index(562363) + 1 data = get_follower_data(apis, followers[ind:]) #g = make_graph(target['id'], followers, apis) def thinking_about_this_stuff(): import networkx as nx import community #from make_graph import make_graph api_path = '../api_keys/' screen_name = 'graphlabteam' apis = oauth_login(api_path)