Beispiel #1
0
def calculate_network_metrics(nodes_map, posts_map, comments_map, network, timestep_size, timestep_window, timestep_count):
    # Parameters    
    timestep, timesteps_range = calculate_timestamp_range(network, timestep_size, timestep_window, timestep_count)
    
    # build the whole network to use for metrics
    directed_multiedge_network = build_network(network)    
    logging.info("network built")  

    # calculate the metrics
    network['metrics'] = compute_all_metrics(nodes_map, posts_map, comments_map, directed_multiedge_network, timesteps_range, timestep, timestep_window)

    logging.info("network metrics done")
    return directed_multiedge_network
Beispiel #2
0
def calculate_network_metrics(nodes_map, posts_map, comments_map, network,
                              timestep_size, timestep_window, timestep_count):
    # Parameters
    timestep, timesteps_range = calculate_timestamp_range(
        network, timestep_size, timestep_window, timestep_count)

    # build the whole network to use for metrics
    directed_multiedge_network = build_network(network)
    logging.info("network built")

    # calculate the metrics
    network['metrics'] = compute_all_metrics(nodes_map, posts_map,
                                             comments_map,
                                             directed_multiedge_network,
                                             timesteps_range, timestep,
                                             timestep_window)

    logging.info("network metrics done")
    return directed_multiedge_network
def main():
    users_resource, \
    nodes_resource, \
    comments_resource, \
    node_title_field, \
    timestep_size, \
    timestep_window, \
    timestep_count, \
    username, \
    password, \
    extraction_method, \
    admin_roles, \
    exclude_isolated, \
    dumpto, \
    destination_path, \
    create_datapackage, \
    datapackage_title, \
    license_type, \
    license_url, \
    site_url = parse_options(sys.argv[1:])
    
    generated = datetime.now()
    
    logging.info("Network processing - started")
    
    # Load the files
    allusers, allnodes, allcomments = load_files(users_resource, nodes_resource, comments_resource, username, password, extraction_method, dumpto, generated)
    
    # extract a normalized set of data
    nodes_map, posts_map, comments_map = eu.extract.normalized_data(allusers, allnodes, allcomments, node_title_field, admin_roles, exclude_isolated)

    # this is the network object
    # going forward it should be read from a serialized format to handle caching
    network = {}

    # Add some file metadata
    network['meta'] = {}
    # Timestamp of the file generation (to show in the dashboard)
    network['meta']['generated'] = int(generated.strftime("%s"))
        
    network['edges'] = extract_edges(nodes_map, comments_map)

    # filter out nodes that have not participated to the full:conversations
    inactive_nodes = [ v for v in nodes_map.values() if not v['active'] ]
    logging.info("inactive nodes: %(n)i" % {'n':len(inactive_nodes)})
    network['nodes'] = [ v for v in nodes_map.values() if v['active'] ]
    
    # Parameters    
    timestep, timesteps_range = calculate_timestamp_range(network, timestep_size, timestep_window, timestep_count)
    
    # build the whole network to use for metrics
    directed_multiedge_network=build_network(network)    
    logging.info("network built")  

    # calculate the metrics
    network['metrics'] = compute_all_metrics(nodes_map, posts_map, comments_map, directed_multiedge_network, timesteps_range, timestep, timestep_window)
    logging.info("network metrics done")  
    
    tag = generated.strftime('%Y-%m-%d-%H-%M-%S')
    tagged_dir = os.path.join(destination_path, 'data', tag)

    # dump the network to a json file, minified
    eu.resource.save(network, 'network.min.json', tagged_dir)
    logging.info("network dumped")  
    
    # create the datapackage
    if create_datapackage:
        try:
            # load the datapackage template
            basepath = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))        
            with open(os.path.join(basepath, "datapackage_template.json"), 'r') as datafile:
                datapackage = json.load(datafile)
                datapackage['license'] = {'type': license_type, 'url': license_url}
                if datapackage_title:
                    datapackage['title'] = datapackage_title
                datapackage['last_updated'] = generated.strftime('%Y-%m-%dT%H:%M:%S')
                datapackage['resources'][0]['url'] = site_url
                datapackage['resources'][0]['path'] = os.path.join('data', tag, 'network.gexf')

                # dump the gexf file
                gexf_file = os.path.join(tagged_dir, 'network.gexf')
                eu.gexf.save_gexf(directed_multiedge_network, gexf_file)
                # dump the datapackage
                eu.resource.save(datapackage, 'datapackage.json', destination_path, True)
                logging.info("datapackage saved")  
        except:
            logging.error("Error reading the datapackage template")
            create_datapackage = False
    
    eu.resource.save({'last': tag, 'datapackage': create_datapackage}, 'last.json', destination_path)
    
    logging.info("Completed")  
def parse():
    node_title_field = 'uid'
    timestep_size = 60 * 60 * 24 * 7
    timestep_window = 1
    timestep_count = 20
    username = None
    password = None
    extraction_method = 'nested'
    admin_roles = set()
    exclude_isolated = False
    generated = datetime.now()

    source_json = request.form['source'] if request.form.has_key(
        'source') else None
    if not source_json:
        raise InvalidUsage('Missing parameters', status_code=400)

    # Download the remote URL
    users, nodes, comments = parse_cif(source_json, 'simple')

    # extract a normalized set of data
    nodes_map, posts_map, comments_map = eu.extract.normalized_data(
        users, nodes, comments, node_title_field, admin_roles,
        exclude_isolated)

    # this is the network object
    # going forward it should be read from a serialized format to handle caching
    network = {}

    # Add some file metadata
    network['meta'] = {}
    # Timestamp of the file generation (to show in the dashboard)
    network['meta']['generated'] = int(generated.strftime("%s"))

    network['edges'] = extract_edges(nodes_map, comments_map)

    # filter out nodes that have not participated to the full:conversations
    inactive_nodes = [v for v in nodes_map.values() if not v['active']]
    logging.info("inactive nodes: %(n)i" % {'n': len(inactive_nodes)})
    network['nodes'] = [v for v in nodes_map.values() if v['active']]

    # Parameters
    timestep, timesteps_range = calculate_timestamp_range(
        network, timestep_size, timestep_window, timestep_count)

    # build the whole network to use for metrics
    directed_multiedge_network = build_network(network)
    logging.info("network built")

    # calculate the metrics
    network['metrics'] = compute_all_metrics(nodes_map, posts_map,
                                             comments_map,
                                             directed_multiedge_network,
                                             timesteps_range, timestep,
                                             timestep_window)
    logging.info("network metrics done")

    # save the results
    tag = generated.strftime('%Y-%m-%d-%H-%M-%S')
    destination_path = os.path.abspath(os.path.join(static_path, "json"))
    tagged_dir = os.path.join(destination_path, "data", tag)

    # dump the network to a json file, minified
    eu.resource.save(network, 'network.min.json', tagged_dir)
    logging.info("network dumped")

    # dump the gexf file
    gexf_file = os.path.join(tagged_dir, 'network.gexf')
    eu.gexf.save_gexf(directed_multiedge_network, gexf_file)

    # return the result URL
    base_path = os.path.join("/json/data", tag)
    result_path = os.path.join(base_path, "network.min.json")

    logging.info("Completed: %(s)s" % {'s': result_path})
    return jsonify({
        'last': tag,
        'base_path': base_path,
        'metrics': 'network.min.json',
        'gexf': 'network.gexf'
    })
Beispiel #5
0
def main():
    users_resource, \
    nodes_resource, \
    comments_resource, \
    node_title_field, \
    timestep_size, \
    timestep_window, \
    timestep_count, \
    username, \
    password, \
    extraction_method, \
    admin_roles, \
    exclude_isolated, \
    dumpto, \
    destination_path, \
    create_datapackage, \
    datapackage_title, \
    license_type, \
    license_url, \
    site_url = parse_options(sys.argv[1:])

    generated = datetime.now()

    logging.info("Network processing - started")

    # Load the files
    allusers, allnodes, allcomments = load_files(users_resource,
                                                 nodes_resource,
                                                 comments_resource, username,
                                                 password, extraction_method,
                                                 dumpto, generated)

    # extract a normalized set of data
    nodes_map, posts_map, comments_map = eu.extract.normalized_data(
        allusers, allnodes, allcomments, node_title_field, admin_roles,
        exclude_isolated)

    # this is the network object
    # going forward it should be read from a serialized format to handle caching
    network = {}

    # Add some file metadata
    network['meta'] = {}
    # Timestamp of the file generation (to show in the dashboard)
    network['meta']['generated'] = int(generated.strftime("%s"))

    network['edges'] = extract_edges(nodes_map, comments_map)

    # filter out nodes that have not participated to the full:conversations
    inactive_nodes = [v for v in nodes_map.values() if not v['active']]
    logging.info("inactive nodes: %(n)i" % {'n': len(inactive_nodes)})
    network['nodes'] = [v for v in nodes_map.values() if v['active']]

    # Parameters
    timestep, timesteps_range = calculate_timestamp_range(
        network, timestep_size, timestep_window, timestep_count)

    # build the whole network to use for metrics
    directed_multiedge_network = build_network(network)
    logging.info("network built")

    # calculate the metrics
    network['metrics'] = compute_all_metrics(nodes_map, posts_map,
                                             comments_map,
                                             directed_multiedge_network,
                                             timesteps_range, timestep,
                                             timestep_window)
    logging.info("network metrics done")

    tag = generated.strftime('%Y-%m-%d-%H-%M-%S')
    tagged_dir = os.path.join(destination_path, 'data', tag)

    # dump the network to a json file, minified
    eu.resource.save(network, 'network.min.json', tagged_dir)
    logging.info("network dumped")

    # create the datapackage
    if create_datapackage:
        try:
            # load the datapackage template
            basepath = os.path.realpath(
                os.path.join(os.getcwd(), os.path.dirname(__file__)))
            with open(
                    os.path.join(basepath, "utils/datapackage_template.json"),
                    'r') as datafile:
                datapackage = json.load(datafile)
                datapackage['license'] = {
                    'type': license_type,
                    'url': license_url
                }
                if datapackage_title:
                    datapackage['title'] = datapackage_title
                datapackage['last_updated'] = generated.strftime(
                    '%Y-%m-%dT%H:%M:%S')
                datapackage['resources'][0]['url'] = site_url
                datapackage['resources'][0]['path'] = os.path.join(
                    'data', tag, 'network.gexf')

                # dump the gexf file
                gexf_file = os.path.join(tagged_dir, 'network.gexf')
                eu.gexf.save_gexf(directed_multiedge_network, gexf_file)
                # dump the datapackage
                eu.resource.save(datapackage, 'datapackage.json',
                                 destination_path, True)
                logging.info("datapackage saved")
        except Exception, e:
            logging.error(e)
            logging.error("Error reading the datapackage template")
            create_datapackage = False