# create empty graph G = nx.DiGraph() # if the domain and topic are in the item if (domain == item.domain and topic == item.topic): # add to the graph edge_list.append((item.author, item.connection, int(item.weight))) check = 1 #TODO new way to check if a graph isnt empty if check == 1: # populate the graph G.add_weighted_edges_from(edge_list) ### generate metrics ### degree inf.run_metric('Degree', G, domain, topic, metric_weight, use_norm, fileout, top_x) ### in degree inf.run_metric('In Degree', G, domain, topic, metric_weight, use_norm, fileout, top_x) ### out degree inf.run_metric('Out Degree', G, domain, topic, metric_weight, use_norm, fileout, top_x) ### closeness inf.run_metric('Closeness Centrality', G, domain, topic, metric_weight, use_norm, fileout, top_x) ### betweenness inf.run_metric('Betweenness Centrality', G, domain, topic, metric_weight, use_norm, fileout, top_x) ### eigenvector inf.run_metric('Eigenvector Centrality', G, domain, topic, metric_weight, use_norm, fileout, top_x)
def centrality(): start_time = datetime.now() # TODO add config file read # TODO support cross network calculations (author_node --is--> author_node) ## >Get the REQUIRED parameters req_params = {} for entry in req_param_list: if request.args.get(entry) is not None: req_params[entry] = urllib2.unquote(request.args.get(entry)).replace("'", "") else: ret_string = "Required parameter missing: " + entry return jsonify(result=ret_string) # TODO Validate start_date, end_date ## >Verify the metric is valid if req_params["metric"] not in metric_list: return jsonify(result="Invalid metric requested") ## >Verify the start date is before the end date if int(req_params["start_date"]) > int(req_params["end_date"]): return jsonify(result="End data before start date") ## >Get the OPTIONAL parameters opt_params = {} for entry in opt_param_list: if request.args.get(entry) is not None: opt_params[entry] = urllib2.unquote(request.args.get(entry)).replace("'", "") else: opt_params[entry] = None # TODO validate the optional parameters ## >Get the FORMAT parameters for_params = {} for entry in format_param_list: if request.args.get(entry) is not None: for_params[entry] = urllib2.unquote(request.args.get(entry)).replace("'", "") else: for_params[entry] = None params = dict(req_params.items() + opt_params.items() + for_params.items()) ## >Build the mongo query mongo_query = {} mongo_query["PostDate"] = {"$gte": params["start_date"], "$lte": params["end_date"]} mongo_query["Network"] = params["network"] for param, value in opt_params.iteritems(): if value is not None: if param is "type": mongo_query["Type"] = opt_params["type"] if param is "twit_collect": mongo_query["Meta.sources"] = {"$in": [opt_params["twit_collect"]]} if param is "matched_project": mongo_query["Matching"] = {"$elemMatch": {"ProjectId": opt_params["matched_project"]}} if param is "matched_topic": # TODO pass if param is "scored_project": # TODO pass if param is "scored_topic": # TODO pass ## >Check if there are any matches if author_collection.find(mongo_query).count == 0: return "No connections found matching the criteria" else: ## >Map/reduce the A-->A connections a2a_map = Code( """ function () { emit({"author": this.Author, "connection": this.Connection}, {"count": 1} ); } """ ) a2a_reduce = Code( """ function (key, values) { var count = 0; values.forEach(function(v) { count += v['count']; }); return {"count": count}; } """ ) a2a_result = author_collection.map_reduce(a2a_map, a2a_reduce, "a2a_results", query=mongo_query).find() ## >Build the author list author_list = [] for a2a_count in a2a_result: author_list.append( ( a2a_count["_id"]["author"].replace("&", "/x26"), a2a_count["_id"]["connection"].replace("&", "/x26"), int(a2a_count["value"]["count"]), ) ) ## >Influence Calculations if len(author_list) > 0: ## >Create a black graph G = nx.DiGraph() ## >Add the endges to the graph G.add_weighted_edges_from(author_list) ## >Run the requested metric, on the graph 'G' calc_metric, stats = inf.run_metric(params["metric"], G, "weight", True) else: return jsonify(result="Parameters produced no graph/metrics") ## >Build the dictionary to return data_results = {} ## >Append the metric data data_results["metrics"] = calc_metric ## >If graph requested if for_params["return_graph"] is not None: if for_params["return_graph"].lower() == "true": ## >If format = data if for_params["format"] is None: ## >Append the graph data data_results["graph"] = nx.to_edgelist(G, nodelist=None) ## >If format = graphml elif for_params["format"].lower() == "graphml": ## >Create the graphml filename graphml_name = inf_sup.create_filename(params) ## >Get the graphml data graphml_data = "\n".join(nx.generate_graphml(G)) ## >Add the versioning graphml_final = '<?xml version="1.0" encoding="UTF-8"?>' + "\n" h = HTMLParser.HTMLParser() for line in graphml_data.split("\n"): ## >Escape the html content line = h.unescape(line) ## >For each node add appropriate metric data into the graphml if '<node id="' in line: graphml_final += line.replace("/>", ">") + "\n" node_name = line.partition('"')[-1].rpartition('"')[0] graphml_final += ' <data key="d1">' + str(calc_metric[node_name]) + "</data>" + "\n" graphml_final += " </node>" + "\n" else: graphml_final += line + "\n" ## >Add the key for the metric attribute if "<key" in line: graphml_final += ( ' <key attr.name="' + params["metric"] + '" attr.type="float" for="node" id="d1" />' ) if app.debug is True: ## >Write out the graphml for testing with open(graphml_name, "w") as output_file: for line in graphml_final: output_file.write(line.encode("utf-8")) if not output_file.closed: output_file.close() ## >Create the appropriate response to return the graphml response = make_response(graphml_final) response.headers["Content-Type"] = "text/xml" response.headers["Content-Distribution"] = "attachment; filename=%s" % (graphml_name,) return response if app.debug is True: ## >If debug mode add the query parameters data_results["query"] = params ## >And add statistics about the process statistics = {} statistics["runtime"] = str(datetime.now() - start_time) data_results["stats"] = statistics ## >Add the mongo query used data_results["query"] = mongo_query return jsonify(result=data_results)
def centrality(): """ Centrality metric endpoint. Custome error code(s): 557: 'Calculation did not converge' """ # Get the REQUIRED parameters req_params = get_params(request, req_param_list) # Get the OPTIONAL parameters opt_params = get_params(request, opt_param_list) print opt_params # Build the mongo query mongo_query = build_mongo_query(req_params, opt_params) # Check if there are any matches if author_collection.find(mongo_query).count == 0: raise_error('No connections found matching the criteria', 416) else: # Map/reduce the A-->A connections a2a_map = Code(""" function () { emit({"author": this.Author, "connection": this.Connection}, {"count": 1} ); } """) a2a_reduce = Code(""" function (key, values) { var count = 0; values.forEach(function(v) { count += v['count']; }); return {"count": count}; } """) # Create a unique collection based on this query query_collection = str(uuid4()) try: a2a_result = author_collection.map_reduce(a2a_map, a2a_reduce, query_collection, query=mongo_query).find() except Exception as e: raise_error(str(e), 503) # Build the author list author_list = [] for a2a_count in a2a_result: con_author = a2a_count['_id']['author'].replace('&', '&') con_connect = a2a_count['_id']['connection'].replace('&', '&') if (len(con_author) > 0) and (len(con_connect) > 0): author_list.append((con_author, con_connect, int(a2a_count['value']['count']))) # Delete the collection based on this query mongo_db[query_collection].drop() # Influence Calculations #TODO need to break this out into a function if len(author_list) > 0: # Create a blank graph G = nx.DiGraph() # Add the edges to the graph G.add_weighted_edges_from(author_list) # Run the requested metric, on the graph 'G' #TODO fix eigenvector formatting if req_params['metric'] == 'eigenvector': raise_error('Eigenvector currently not available', 501) if opt_params['normalized'] is not None: if opt_params['normalized'] == 'true': make_normal = True elif opt_params['normalized'] == 'false': make_normal = False else: raise_error('Invalid normalized parameter: ' + opt_params['normalized'], 400) elif 'degree' in req_params['metric']: make_normal = False else: make_normal = True calc_metric, stats = inf.run_metric(req_params['metric'], G, 'weight', make_normal) if '>calc_error<' in calc_metric.keys(): if req_params['metric'] == 'pagerank': # Raise custom error code - calculation did not converge raise_error('Pagerank did not converge', 557) else: raise_error('General calculation error', 557) else: raise_error('No connections found matching the criteria', 416) # Build the dictionary to return data_results = {} # Append the metric data data_results['metrics'] = calc_metric # To the log #TODO app.logger.debug('A value for debugging') #TODO Log the stats return jsonify(result=data_results)
def centrality(): start_time = datetime.now() #TODO add config file read #TODO support cross network calculations (author_node --is--> author_node) ## >Get the REQUIRED parameters req_params = {} for entry in req_param_list: if request.args.get(entry) is not None: req_params[entry] = urllib2.unquote(request.args.get(entry)).replace('\'', '') else: ret_string = {'error': 'Required parameter missing: ' + entry} inf_sup.append_to_log(log_filename, str(ret_string)) return jsonify(ret_string) #TODO Validate start_date, end_date ## >Verify the metric is valid if req_params['metric'] not in metric_list: ret_string = {'error': 'Invalid metric requested'} inf_sup.append_to_log(log_filename, str(ret_string)) return jsonify(ret_string) ## >Verify the start date is before the end date if int(req_params['start_date']) > int(req_params['end_date']): ret_string = {'error': 'End data before start date'} inf_sup.append_to_log(log_filename, str(ret_string)) return jsonify(ret_string) ## >Get the OPTIONAL parameters opt_params = {} for entry in opt_param_list: if request.args.get(entry) is not None: opt_params[entry] = urllib2.unquote(request.args.get(entry)).replace('\'', '') else: opt_params[entry] = None #TODO validate the optional parameters ## >Get the FORMAT parameters for_params = {} for entry in format_param_list: if request.args.get(entry) is not None: for_params[entry] = urllib2.unquote(request.args.get(entry)).replace('\'', '') else: for_params[entry] = None params = dict(req_params.items() + opt_params.items() + for_params.items()) ## >Build the mongo query mongo_query = {} mongo_query['PostDate'] = {'$gte': params['start_date'], '$lte': params['end_date']} mongo_query['Network'] = params['network'] for param, value in opt_params.iteritems(): if value is not None: if param is 'type': mongo_query['Type'] = opt_params['type'] if param is 'twit_collect': mongo_query['Meta.sources'] = {'$in': [opt_params['twit_collect']]} if param is 'matched_project': mongo_query['Matching'] = {'$elemMatch': {'ProjectId': opt_params['matched_project']}} if param is 'matched_topic': #TODO pass if param is 'scored_project': #TODO pass if param is 'scored_topic': #TODO pass ## >Check if there are any matches if author_collection.find(mongo_query).count == 0: ret_string = {'error': 'No connections found matching the criteria'} inf_sup.append_to_log(log_filename, str(ret_string)) return jsonify(ret_string) else: ## >Map/reduce the A-->A connections a2a_map = Code(""" function () { emit({"author": this.Author, "connection": this.Connection}, {"count": 1} ); } """) a2a_reduce = Code(""" function (key, values) { var count = 0; values.forEach(function(v) { count += v['count']; }); return {"count": count}; } """) a2a_result = author_collection.map_reduce(a2a_map, a2a_reduce, "a2a_results", query=mongo_query).find() ## >Build the author list author_list = [] for a2a_count in a2a_result: con_author = a2a_count['_id']['author'].replace('&', '&') con_connect = a2a_count['_id']['connection'].replace('&', '&') if (len(con_author) > 0) and (len(con_connect) > 0): author_list.append((con_author, con_connect, int(a2a_count['value']['count']))) ## >Influence Calculations if len(author_list) > 0: ## >Create a black graph G = nx.DiGraph() ## >Add the endges to the graph G.add_weighted_edges_from(author_list) ## >Run the requested metric, on the graph 'G' try: calc_metric, stats = inf.run_metric(params['metric'], G, 'weight', True) except: try: if params['metric'] is 'pagerank': calc_metric, stats = inf.run_metric('pagerank_norm', G, 'weight', True) else: return jsonify({'error': 'Error calculating metric'}) except: return jsonify({'error': 'Pagerank did not converge'}) else: ret_string = {'error': 'No connections found matching the criteria'} inf_sup.append_to_log(log_filename, str(ret_string)) return jsonify(ret_string) ## >Build the dictionary to return data_results = {} ## >Append the metric data data_results['metrics'] = calc_metric ## >If graph requested if for_params['return_graph'] is not None: if for_params['return_graph'].lower() == 'true': ## >If format = data if for_params['format'] is None: ## >Append the graph data data_results['graph'] = nx.to_edgelist(G, nodelist=None) ## >If format = graphml elif for_params['format'].lower() == 'graphml': ## >Create the graphml filename graphml_name = inf_sup.create_filename(params) ## >Get the graphml data graphml_data = '\n'.join(nx.generate_graphml(G)) ## >Add the versioning graphml_final = '<?xml version="1.0" encoding="UTF-8"?>' + "\n" h = HTMLParser.HTMLParser() for line in graphml_data.split("\n"): ## >Escape the html content line = h.unescape(line) ## >For each node add appropriate metric data into the graphml if '<node id="' in line: graphml_final += (line.replace('/>', '>') + "\n") node_name = line.partition('"')[-1].rpartition('"')[0] graphml_final += ' <data key="d1">' + str(calc_metric[node_name]) + '</data>' + "\n" graphml_final += ' </node>' + "\n" else: graphml_final += line + "\n" ## >Add the key for the metric attribute if '<key' in line: graphml_final += ' <key attr.name="' + params['metric'] + '" attr.type="float" for="node" id="d1" />' if app.debug is True: ## >Write out the graphml for testing graphml_name = inf_sup.create_filename(params) with open(graphml_name, 'w') as output_file: for line in graphml_final: output_file.write(line.encode('utf-8')) if not output_file.closed: output_file.close() ## >Create the appropriate response to return the graphml response = make_response(graphml_final) response.headers["Content-Type"] = 'text/xml' response.headers["Content-Distribution"] = 'attachment; filename=%s' % (graphml_name,) return response ## >To the log statistics = {} statistics['api_query'] = params statistics['mongo_query'] = mongo_query statistics['influence_metric'] = params['metric'] statistics['metric_runtime'] = stats statistics['full_runtime'] = str(datetime.now() - start_time) statistics['graph_nodes'] = G.order() statistics['graph_edges'] = G.size() inf_sup.append_to_log(log_filename, str(statistics)) if app.debug is True: ### >Write out the influence for testing graphml_name = inf_sup.create_filename(params) influence_file = graphml_name.replace('.graphml', '.txt') with open(influence_file, 'w') as output_file: graph_list = calc_metric.items() for item in graph_list: output_file.write(item[0].encode('utf_8') + "," + str(item[1]) + '\n') if not output_file.closed: output_file.close() return jsonify(result=data_results)