def calculate_network_metrics(nodes_map, posts_map, comments_map, network, timestep_size, timestep_window, timestep_count): # Parameters timestep, timesteps_range = calculate_timestamp_range(network, timestep_size, timestep_window, timestep_count) # build the whole network to use for metrics directed_multiedge_network = build_network(network) logging.info("network built") # calculate the metrics network['metrics'] = compute_all_metrics(nodes_map, posts_map, comments_map, directed_multiedge_network, timesteps_range, timestep, timestep_window) logging.info("network metrics done") return directed_multiedge_network
def calculate_network_metrics(nodes_map, posts_map, comments_map, network, timestep_size, timestep_window, timestep_count): # Parameters timestep, timesteps_range = calculate_timestamp_range( network, timestep_size, timestep_window, timestep_count) # build the whole network to use for metrics directed_multiedge_network = build_network(network) logging.info("network built") # calculate the metrics network['metrics'] = compute_all_metrics(nodes_map, posts_map, comments_map, directed_multiedge_network, timesteps_range, timestep, timestep_window) logging.info("network metrics done") return directed_multiedge_network
def main(): users_resource, \ nodes_resource, \ comments_resource, \ node_title_field, \ timestep_size, \ timestep_window, \ timestep_count, \ username, \ password, \ extraction_method, \ admin_roles, \ exclude_isolated, \ dumpto, \ destination_path, \ create_datapackage, \ datapackage_title, \ license_type, \ license_url, \ site_url = parse_options(sys.argv[1:]) generated = datetime.now() logging.info("Network processing - started") # Load the files allusers, allnodes, allcomments = load_files(users_resource, nodes_resource, comments_resource, username, password, extraction_method, dumpto, generated) # extract a normalized set of data nodes_map, posts_map, comments_map = eu.extract.normalized_data(allusers, allnodes, allcomments, node_title_field, admin_roles, exclude_isolated) # this is the network object # going forward it should be read from a serialized format to handle caching network = {} # Add some file metadata network['meta'] = {} # Timestamp of the file generation (to show in the dashboard) network['meta']['generated'] = int(generated.strftime("%s")) network['edges'] = extract_edges(nodes_map, comments_map) # filter out nodes that have not participated to the full:conversations inactive_nodes = [ v for v in nodes_map.values() if not v['active'] ] logging.info("inactive nodes: %(n)i" % {'n':len(inactive_nodes)}) network['nodes'] = [ v for v in nodes_map.values() if v['active'] ] # Parameters timestep, timesteps_range = calculate_timestamp_range(network, timestep_size, timestep_window, timestep_count) # build the whole network to use for metrics directed_multiedge_network=build_network(network) logging.info("network built") # calculate the metrics network['metrics'] = compute_all_metrics(nodes_map, posts_map, comments_map, directed_multiedge_network, timesteps_range, timestep, timestep_window) logging.info("network metrics done") tag = generated.strftime('%Y-%m-%d-%H-%M-%S') tagged_dir = os.path.join(destination_path, 'data', tag) # dump the network to a json file, minified eu.resource.save(network, 'network.min.json', tagged_dir) logging.info("network dumped") # create the datapackage if create_datapackage: try: # load the datapackage template basepath = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) with open(os.path.join(basepath, "datapackage_template.json"), 'r') as datafile: datapackage = json.load(datafile) datapackage['license'] = {'type': license_type, 'url': license_url} if datapackage_title: datapackage['title'] = datapackage_title datapackage['last_updated'] = generated.strftime('%Y-%m-%dT%H:%M:%S') datapackage['resources'][0]['url'] = site_url datapackage['resources'][0]['path'] = os.path.join('data', tag, 'network.gexf') # dump the gexf file gexf_file = os.path.join(tagged_dir, 'network.gexf') eu.gexf.save_gexf(directed_multiedge_network, gexf_file) # dump the datapackage eu.resource.save(datapackage, 'datapackage.json', destination_path, True) logging.info("datapackage saved") except: logging.error("Error reading the datapackage template") create_datapackage = False eu.resource.save({'last': tag, 'datapackage': create_datapackage}, 'last.json', destination_path) logging.info("Completed")
def parse(): node_title_field = 'uid' timestep_size = 60 * 60 * 24 * 7 timestep_window = 1 timestep_count = 20 username = None password = None extraction_method = 'nested' admin_roles = set() exclude_isolated = False generated = datetime.now() source_json = request.form['source'] if request.form.has_key( 'source') else None if not source_json: raise InvalidUsage('Missing parameters', status_code=400) # Download the remote URL users, nodes, comments = parse_cif(source_json, 'simple') # extract a normalized set of data nodes_map, posts_map, comments_map = eu.extract.normalized_data( users, nodes, comments, node_title_field, admin_roles, exclude_isolated) # this is the network object # going forward it should be read from a serialized format to handle caching network = {} # Add some file metadata network['meta'] = {} # Timestamp of the file generation (to show in the dashboard) network['meta']['generated'] = int(generated.strftime("%s")) network['edges'] = extract_edges(nodes_map, comments_map) # filter out nodes that have not participated to the full:conversations inactive_nodes = [v for v in nodes_map.values() if not v['active']] logging.info("inactive nodes: %(n)i" % {'n': len(inactive_nodes)}) network['nodes'] = [v for v in nodes_map.values() if v['active']] # Parameters timestep, timesteps_range = calculate_timestamp_range( network, timestep_size, timestep_window, timestep_count) # build the whole network to use for metrics directed_multiedge_network = build_network(network) logging.info("network built") # calculate the metrics network['metrics'] = compute_all_metrics(nodes_map, posts_map, comments_map, directed_multiedge_network, timesteps_range, timestep, timestep_window) logging.info("network metrics done") # save the results tag = generated.strftime('%Y-%m-%d-%H-%M-%S') destination_path = os.path.abspath(os.path.join(static_path, "json")) tagged_dir = os.path.join(destination_path, "data", tag) # dump the network to a json file, minified eu.resource.save(network, 'network.min.json', tagged_dir) logging.info("network dumped") # dump the gexf file gexf_file = os.path.join(tagged_dir, 'network.gexf') eu.gexf.save_gexf(directed_multiedge_network, gexf_file) # return the result URL base_path = os.path.join("/json/data", tag) result_path = os.path.join(base_path, "network.min.json") logging.info("Completed: %(s)s" % {'s': result_path}) return jsonify({ 'last': tag, 'base_path': base_path, 'metrics': 'network.min.json', 'gexf': 'network.gexf' })
def main(): users_resource, \ nodes_resource, \ comments_resource, \ node_title_field, \ timestep_size, \ timestep_window, \ timestep_count, \ username, \ password, \ extraction_method, \ admin_roles, \ exclude_isolated, \ dumpto, \ destination_path, \ create_datapackage, \ datapackage_title, \ license_type, \ license_url, \ site_url = parse_options(sys.argv[1:]) generated = datetime.now() logging.info("Network processing - started") # Load the files allusers, allnodes, allcomments = load_files(users_resource, nodes_resource, comments_resource, username, password, extraction_method, dumpto, generated) # extract a normalized set of data nodes_map, posts_map, comments_map = eu.extract.normalized_data( allusers, allnodes, allcomments, node_title_field, admin_roles, exclude_isolated) # this is the network object # going forward it should be read from a serialized format to handle caching network = {} # Add some file metadata network['meta'] = {} # Timestamp of the file generation (to show in the dashboard) network['meta']['generated'] = int(generated.strftime("%s")) network['edges'] = extract_edges(nodes_map, comments_map) # filter out nodes that have not participated to the full:conversations inactive_nodes = [v for v in nodes_map.values() if not v['active']] logging.info("inactive nodes: %(n)i" % {'n': len(inactive_nodes)}) network['nodes'] = [v for v in nodes_map.values() if v['active']] # Parameters timestep, timesteps_range = calculate_timestamp_range( network, timestep_size, timestep_window, timestep_count) # build the whole network to use for metrics directed_multiedge_network = build_network(network) logging.info("network built") # calculate the metrics network['metrics'] = compute_all_metrics(nodes_map, posts_map, comments_map, directed_multiedge_network, timesteps_range, timestep, timestep_window) logging.info("network metrics done") tag = generated.strftime('%Y-%m-%d-%H-%M-%S') tagged_dir = os.path.join(destination_path, 'data', tag) # dump the network to a json file, minified eu.resource.save(network, 'network.min.json', tagged_dir) logging.info("network dumped") # create the datapackage if create_datapackage: try: # load the datapackage template basepath = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) with open( os.path.join(basepath, "utils/datapackage_template.json"), 'r') as datafile: datapackage = json.load(datafile) datapackage['license'] = { 'type': license_type, 'url': license_url } if datapackage_title: datapackage['title'] = datapackage_title datapackage['last_updated'] = generated.strftime( '%Y-%m-%dT%H:%M:%S') datapackage['resources'][0]['url'] = site_url datapackage['resources'][0]['path'] = os.path.join( 'data', tag, 'network.gexf') # dump the gexf file gexf_file = os.path.join(tagged_dir, 'network.gexf') eu.gexf.save_gexf(directed_multiedge_network, gexf_file) # dump the datapackage eu.resource.save(datapackage, 'datapackage.json', destination_path, True) logging.info("datapackage saved") except Exception, e: logging.error(e) logging.error("Error reading the datapackage template") create_datapackage = False