Ejemplo n.º 1
0
def make_recommender_data(G, window, tag_edges):
    """Make the GEXF used for recommending stories

    This takes the GEXF file containing every interaction over
    time and generates the 'recommenderdata.gexf' file, which
    is used by 'pagerank.py' to determine stories that should be
    recommended to users 
    
    It also makes the 'neglected_nodes' list, consisting of nodes
    with a degree < 2 and age < 50 days 
    
    :param G: NetworkX graph of all interactions across time 
    :param window: A 2-tuple containing the start and end dates
                   of the graph actions
    :param tag_edges: List of NetworkX edges to tag nodes
    
    """
    print('making recommender data')
    nodeiter = G.nodes(data=True)
    neglected_nodes = []

    #Find new stories that have not received any attention yet
    G.remove_edges_from(tag_edges)

    for (n, c) in nodeiter:
        del c['nodemeta']
        del c['tags']
        #'neglected_nodes' = new stories with little interaction
        if G.degree[n] < 2 and c['type'] == 'story' and 'create_story' in c:
            created = cf.to_date(c['create_story'][0][1])
            if (datetime.now() - created).days < 50:
                neglected_nodes.append(n)

    edgeiter = G.edges(data=True)
    G.add_edges_from(tag_edges)

    for (u, v, c) in edgeiter:
        if 'edgemeta' in c:
            del c['edgemeta']
        #Find most recent activation of this edge (used for PageRank)
        updated = cf.to_date(c['spells'][0][1])
        for spell in c['spells']:
            if cf.to_date(spell[1]) > updated:
                updated = cf.to_date(spell[1])
        c['last_date'] = cf.to_str(updated)

    #create the 'recommenderdata.gexf' file for use by pagerank.py
    nx.write_gexf(G, "newdata.gexf")
    tree = ET.parse("newdata.gexf")
    root = tree.getroot()
    root[0].set('neglected_nodes', ' '.join(neglected_nodes))
    root[0].set('start', cf.to_str(window[0]))
    root[0].set('end', cf.to_str(window[1]))
    root[0].set('timeformat', 'date')
    tree.write(cf.RECOMMEND_FILE)
    os.remove("newdata.gexf")
Ejemplo n.º 2
0
def parse(*gexffile):
    """Entry method to begin parsing the GEXF file

    This is the method called through the Flask API to begin parsing the
    GEXF file of all commonfare.net interactions. Once the GEXF is in the
    correct format, it is passed to methods in the makegraphs.py module to
    output JSON data for visualisation purposes
    """
    if len(gexffile) == 0:  #Use the default (when running from Docker)
        filename = os.environ['GEXF_INPUT']
    else:
        filename = gexffile[0]
        print('filename is '), gexffile

    ET.register_namespace("", "http://www.gexf.net/1.2draft")
    tree = ET.parse(filename)
    namespaces = {'xmlns': 'http://www.gexf.net/1.2draft'}
    root = tree.getroot()
    root[0].set('mode', 'dynamic')
    root[0].set('timeformat', 'date')

    #Add new ID and dynamic attributes
    (d, gexf_id) = addNewAttributes(root, namespaces)

    cleanNodes(root, namespaces, gexf_id)

    #Find edges to delete, earliest start and end dates of actions
    (edgestodelete, mindate, maxdate) = cleanEdges(root, namespaces, d)
    edges = root[0].find('xmlns:edges', namespaces)

    for e in edgestodelete:
        if e in edges:
            edges.remove(e)

    #Set date of first and last interaction in root tag of GEXF file
    root[0].set('start', cf.to_str(mindate))
    root[0].set('end', cf.to_str(maxdate))
    filename = os.path.splitext(filename)[0]
    parsedfilename = filename + "parsed.gexf"
    tree.write(parsedfilename)
    print('done parsing')

    #Now make the JSON graphs for visualisation
    makegraphs.init(parsedfilename)

    return jsonify({'success': True})
Ejemplo n.º 3
0
        
        
        
if __name__ == "__main__":   
    cur_date = datetime.datetime(2016,6,1)
    start_date = cur_date
    G=nx.Graph()
    counter = 0

    if not os.path.exists("../../data/input"):
        os.makedirs("../../data/input")

    #Loop for cf.DAYS days, doing cf.ACTIONS_PER_DAY actions each time
    while counter < cf.DAYS:
        cur_date = cur_date + datetime.timedelta(days=1)
        s_today = cf.to_str(cur_date)
        counter = counter + 1
        G = nx.convert_node_labels_to_integers(G)


            
        while len(G.nodes()) < cf.INITIAL_USERS:
            G = add_user()

        G = nx.convert_node_labels_to_integers(G)
        
        if len(G.nodes()) == cf.INITIAL_USERS:
            #Pick some nodes to be the 'colluders' in the simulation 
            cf.colluding_nodes = random.sample(G.nodes(),cf.NUM_COLLUDERS)
            
                #Seed some tags
Ejemplo n.º 4
0
def parse(gexffile):
    """Entry method to begin parsing the GEXF file

    This is the method called through the Flask API to begin parsing the
    GEXF file of all commonfare.net interactions. Once the GEXF is in the
    correct format, it is passed to methods in the makegraphs.py module to
    output JSON data for visualisation purposes
    """
    if gexffile is None:
        filename = os.environ['GEXF_INPUT']
    else:
        filename = gexffile

    #The ElementTree API has no way to grab the namespace directly
    #so this resorts to manual file IO to change it to 1.2draft
    newtext = (
        '<gexf xmlns="http://www.gexf.net/1.2draft" version="1.2"' +
        ' xsi="http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd">\n'
    )
    x = fileinput.FileInput(filename, inplace=1)
    for line in x:
        #Replace the initial line defining the namespaces
        if line.startswith("<gexf"):
            line = newtext
        #Remove visualisation meta-info
        elif re.search('<(\S*):', line) is not None:
            continue
        sys.stdout.write(line)
    x.close()

    tree = ET.parse(filename)
    root = tree.getroot()
    ET.register_namespace("", "http://www.gexf.net/1.2draft")
    namespaces = {'xmlns': "http://www.gexf.net/1.2draft"}

    #Remove the 'meta' tag, it's not necesary
    meta = root.find('xmlns:meta', namespaces)
    if meta is not None:
        root.remove(meta)
    graph = root.find('xmlns:graph', namespaces)
    graph.set('mode', 'dynamic')
    graph.set('timeformat', 'date')

    #First, find all existing attributes
    attributes = graph.findall('xmlns:attributes', namespaces)
    static_edge_attributes = []
    static_edge_parent_tag = None
    for x in attributes:
        #Then find attributes belonging to edges
        if x.attrib['class'] == 'edge':
            static_edge_parent_tag = x
            for att in x.findall('xmlns:attribute', namespaces):
                #They all need to be strings so that the initiator
                #node ID can be appended onto them later
                att.attrib['type'] = 'string'
                att.attrib['mode'] = 'dynamic'
                static_edge_attributes.append(att)

    #All static attributes become dynamic so remove this parent tag
    if static_edge_parent_tag is not None:
        graph.remove(static_edge_parent_tag)

    #Add a parent tag for dynamic edge attributes
    attrib = {'class': 'edge', 'mode': 'dynamic'}
    dynamic_edge_attributes = graph.makeelement('attributes', attrib)
    graph.insert(1, dynamic_edge_attributes)

    #First add all the old static edge attributes
    for x in static_edge_attributes:
        dynamic_edge_attributes.append(x)

    #Edge attribute that represents which node made an action
    attrib = {'id': 'init', 'type': 'string', 'title': 'initiator'}
    attr = dynamic_edge_attributes.makeelement('attribute', attrib)
    dynamic_edge_attributes.append(attr)

    #Correct formatting of nodes
    nodes = graph.find('xmlns:nodes', namespaces)
    for n in nodes:
        attvalues = n.findall('xmlns:attvalues', namespaces)
        #Need to merge multiple sets of attvalues
        if len(attvalues) > 1:
            for attval in attvalues[1]:
                attvalues[0].append(attval)
            n.remove(attvalues[1])
        #Convert timestamps to date strings for attvalues/spells
        if len(attvalues) > 0:
            for attval in attvalues[0]:
                updateTimestamps(attval, None)
        spells = n.find('xmlns:spells', namespaces)
        if spells is not None:
            for spell in spells:
                updateTimestamps(spell, None)

    edges = graph.find('xmlns:edges', namespaces)
    edgestodelete = []
    mindate = datetime(3333, 10, 1)
    maxdate = datetime(1, 1, 1)
    existingedges = {}

    #Here finds what edges to delete
    for elem in edges:
        edgetype = None
        if 'label' in elem.attrib:
            label = elem.attrib['label']
        if 'id' in elem.attrib:
            edgeid = elem.attrib['id']
        source = elem.attrib['source']
        target = elem.attrib['target']
        if 'timestamp' in elem.attrib:
            timestamp = elem.attrib['timestamp']
        else:
            timestamp = None

        #Same as with nodes - merge multiple attvalues
        attvalues = elem.findall('xmlns:attvalues', namespaces)
        if len(attvalues) > 1:
            for attval in attvalues[1]:
                attvalues[0].append(attval)
            elem.remove(attvalues[1])
        if len(attvalues) > 0:
            for attval in attvalues[0]:
                updateTimestamps(attval, timestamp)
                #Append initiator node's ID to attvalue
                attval.attrib['value'] = (elem.attrib['source'] + "/" +
                                          attval.attrib['value'])

        #Delete self-looping edge and continue
        if source == target:
            edgestodelete.append(elem)
            continue

        edgeid = source + '-' + target
        altedgeid = target + '-' + source

        attvalues = elem.find('xmlns:attvalues', namespaces)

        #This is an edge not seen before
        if edgeid not in existingedges and altedgeid not in existingedges:
            #Add spells and attvalues if the edge doesn't have them
            if elem.find('xmlns:spells') is None:
                spells = elem.makeelement('spells', {})
                elem.append(spells)
            else:
                spells = elem.find('xmlns:spells')
            if attvalues is None:
                attvalues = elem.makeelement('attvalues', {})
                elem.append(attvalues)
        #This is an edge that existed in the reverse direction
        else:
            alt_edge_attvalues = attvalues
            old_edge_id = edgeid if edgeid in existingedges else altedgeid
            spells = existingedges[old_edge_id].find('spells')
            #print ('old edge id is ',old_edge_id)
            #print (existingedges[old_edge_id])
            #xmlstr = ET.tostring(existingedges[old_edge_id], encoding='utf8', method='xml')
            #print (xmlstr)
            attvalues = existingedges[old_edge_id].find(
                'xmlns:attvalues', namespaces)
            if attvalues is None:
                attvalues = existingedges[old_edge_id].find('attvalues')
                #print ('yup attvalues was none')
            if alt_edge_attvalues is not None:
                for att in alt_edge_attvalues:
                    attvalues.append(att)

        if edgeid not in existingedges and altedgeid not in existingedges:
            existingedges[edgeid] = elem
        else:  #Remove duplicate edges
            edgestodelete.append(elem)

        if timestamp is not None:  #Edge contains 'timestamp' attribute
            start = timestamp
            end = timestamp
        else:
            if len(spells) > 0:
                for spell in spells:
                    updateTimestamps(spell, None)
                    parseddate = cf.to_date(spell.attrib['start'])
                    if mindate > parseddate:
                        mindate = parseddate
                    if maxdate < parseddate:
                        maxdate = parseddate
                    attrib = {
                        'value': source,
                        'for': 'init',
                        'start': spell.attrib['start'],
                        'end': spell.attrib['end']
                    }
                    attvalue = attvalues.makeelement('attvalue', attrib)
                    attvalues.append(attvalue)
                    updateTimestamps(attvalue, timestamp)
            continue

        #Add the 'spell' of this action (start date and end date)
        attrib = {'start': start, 'end': end}
        spell = spells.makeelement('spell', attrib)
        #updateTimestamps(spell,timestamp)
        spells.append(spell)

        #Store more info in the 'attvalue' - initiator of action and its type
        if edgetype is not None:
            attrib = {
                'value': source + '-' + target,
                'for': edgetype,
                'start': start,
                'end': end
            }
            attvalue = attvalues.makeelement('attvalue', attrib)
            attvalues.append(attvalue)
        #else: #If we don't have different action types, we can at least store who instigated this action
        attrib = {'value': source, 'for': 'init', 'start': start, 'end': end}
        attvalue = attvalues.makeelement('attvalue', attrib)
        attvalues.append(attvalue)
        updateTimestamps(attvalue, timestamp)

        #Find the nodes connected by this edge and add info on the action
        sourceattrs = nodes.find("*/[@id='" + source + "']/*")
        targetattrs = nodes.find("*/[@id='" + target + "']/*")

        updateTimestamps(spell, timestamp)
        attrs = {"start": spell.attrib['start'], "end": spell.attrib['end']}
        source = nodes.find("*/[@id='" + elem.attrib['source'] + "']")
        target = nodes.find("*/[@id='" + elem.attrib['target'] + "']")
        addNodeSpell(source, attrs)
        addNodeSpell(target, attrs)
        parseddate = cf.to_date(spell.attrib['start'])
        if mindate > parseddate:
            mindate = parseddate
        if maxdate < parseddate:
            maxdate = parseddate

    for e in edgestodelete:
        if e in edges:
            edges.remove(e)

    #Set date of first and last interaction in root tag of GEXF file
    #If mindate or maxdate is None then this means we've got a static network
    if mindate is not None and mindate != datetime(3333, 10, 1):
        graph.set('start', cf.to_str(mindate))
        graph.set('end', cf.to_str(maxdate))
    filename = os.path.splitext(filename)[0]
    parsedfilename = filename + "parsed.gexf"
    tree.write(parsedfilename)
    print('done parsing')

    #Now make the JSON graphs for visualisation
    makegraphs.init(parsedfilename, 'default.txt')

    return jsonify({'success': True})
Ejemplo n.º 5
0
def make_graphs(G, window, index, communities, commoner_graphs):
    """
    Generate JSON for NetworkX graph. Update commoner graphs.
    
    This method generates all necessary information from a NetworkX
    graph representation and returns it in a JSON format. It also 
    updates the 'dynamic communities' and individual commoner graphs
    (using make_dynamic_communities and build_commoner_data methods)
    
    :param G: NetworkX graph of interactions in time window 
    :param window: 2-tuple containing start and end dates 
    :param index: integer representing time step
    :param communities: list holding NetworkX dynamic communities 
    (filled in by make_dynamic_communities method) 
    :param commoner_graphs: dictionary mapping each commoner node to
    its interaction history (filled in by build_commoner_data method)
    :returns: tuple containing:
              1. Updated dynamic communities
              2. Updated commoner_graphs
              3. JSON representation of NetworkX graph
    """
    edges_to_remove = []
    tag_edges = []
    tag_nodes = {}
    tag_counts = {}  #Holds counts of each of the tags
    cumulative = (index == 0)
    create_count = 0
    comment_count = 0
    convo_count = 0
    trans_count = 0

    if index > 0:
        graph_copy = copy.deepcopy(G)  #To avoid screwing future iterations
    else:
        graph_copy = G
    nodeiter = G.nodes(data=True)
    edgeiter = G.edges(data=True)

    #Filter edges outside time window and add count stats
    for (u, v, c) in edgeiter:
        edge_exists = False

        for intervals in c['spells']:
            if (window[0] <= cf.to_date(intervals[0]) < window[1]):
                edge_exists = True

        if edge_exists == False:
            edges_to_remove.append((u, v, c))
        else:
            #Find node that wrote story, add it to their 'nodemeta'
            if G.nodes[u]["type"] == "story":
                if "create_story" in G.nodes[u]:
                    G.nodes[v]['nodemeta'] = ['story']
            elif G.nodes[v]["type"] == "story":
                if "create_story" in G.nodes[v]:
                    G.nodes[u]['nodemeta'] = ['story']

            #Count how many different edge types there are
            if "create_story" in c:
                if cf.in_date(window, c["create_story"][0][1]):
                    create_count += 1
            if "comment_story" in c:
                for comment in c["comment_story"]:
                    if cf.in_date(window, comment[1]):
                        comment_count += 1
            if "conversation" in c:
                for convo in c["conversation"]:
                    if cf.in_date(window, convo[1]):
                        convo_count += 1
            if "transaction" in c:
                for trans in c["transaction"]:
                    if cf.in_date(window, trans[1]):
                        trans_count += 1

            #Special actions if the edge connects a node to a tag
            if G.nodes[u]["type"] == "tag" or G.nodes[v]["type"] == "tag":
                tag_edges.append((u, v, c))
                if G.nodes[u]["type"] == "tag":
                    tagname = G.nodes[u]["name"]
                else:
                    tagname = G.nodes[v]["name"]
                graph_copy.nodes[u]["tags"].append(tagname)
                graph_copy.nodes[v]["tags"].append(tagname)
                if tagname not in tag_counts:
                    tag_counts[tagname] = 0
                tag_counts[tagname] += 1

    #Remove non-existent edges
    graph_copy.remove_edges_from(edges_to_remove)

    #Also remove the tag edges so not to influence k-core calculation
    graph_copy.remove_edges_from((tag_edges))

    #Filter nodes outside the time window
    nodes_to_remove = []
    zero_nodes = []
    for (n, c) in nodeiter:
        graph_copy.nodes[n]['nodemeta'] = []

        node_exists = False
        graph_copy.nodes[n]['date'] = cf.to_str(window[0])
        c['date'] = cf.to_str(window[0])  #TODO: Do both lines need to be here?
        if 'spells' in c:
            for intervals in c['spells']:
                if cf.in_date(window, intervals[0]):
                    node_exists = True
        if node_exists == False:
            nodes_to_remove.append(n)
            if c['type'] == 'commoner':
                zero_nodes.append((n, c))
    graph_copy.remove_nodes_from(nodes_to_remove)

    #Get rid of spells and actions that fall outside the window range
    filter_spells(graph_copy, window)

    #DO THE KCORE CALCULATIONS HERE
    colluders = dx.weighted_core(graph_copy, window)

    #Add the tags back in
    graph_copy.add_edges_from(tag_edges)

    nodeiter = graph_copy.nodes(data=True)
    for (n, c) in nodeiter:
        if c['type'] == 'tag':
            tag_nodes[n] = c

    #Recommender data is built from the cumulative graph
    if not cumulative:
        build_commoner_data(graph_copy, commoner_graphs, zero_nodes)
    else:
        print('making rec data HERE')
        make_recommender_data(copy.deepcopy(graph_copy), window, tag_edges)

    #Remove isolated nodes that exist after removing Basic Income
    graph_copy.remove_nodes_from(list(nx.isolates(graph_copy)))

    #Give each edge the weight of its primary direction
    #TODO: Why not take into account other direction?
    iter = graph_copy.edges(data=True)
    for (u, v, c) in iter:
        if 'edgeweight' in c:
            c['edgeweight'] = c['edgeweight'][u]
        else:
            c['edgeweight'] = 1

    #Now compare fronts to previous partitions
    if not cumulative:
        partition = make_dynamic_communities(graph_copy, communities, index)
    else:
        partition = community.best_partition(graph_copy, weight='edgeweight')

    #Simple counting of different node types
    c_count = 0
    s_count = 0
    t_count = 0
    l_count = 0
    nodeiter = graph_copy.nodes(data=True)
    for n, c in nodeiter:
        if c['type'] == 'commoner':
            c_count += 1
        elif c['type'] == 'story':
            s_count += 1
        elif c['type'] == 'tag':
            t_count += 1
        elif c['type'] == 'listing':
            l_count += 1
        c['cluster'] = partition[n]

    n_count = nx.number_of_nodes(graph_copy)
    e_count = nx.number_of_edges(graph_copy)
    core_graph_json = json_graph.node_link_data(graph_copy)
    # tags = sorted(iter(tag_counts.items()),reverse=True,key=lambda kv: (kv[1], kv[0]))
    tags = [(k, tag_counts[k])
            for k in sorted(tag_counts, key=tag_counts.get, reverse=True)]
    #Additional info about the graph itself
    meta_info = {
        'commoners': c_count,
        'stories': s_count,
        'listings': l_count,
        'tags': t_count,
        'create': create_count,
        'comment': comment_count,
        'convo': convo_count,
        'trans': trans_count,
        'nodenum': n_count,
        'edge_num': e_count,
        'tagcount': tags,
        'date': cf.to_str(window[1]),
        'colluders': colluders
    }
    core_graph_json.update(meta_info)
    return (communities, commoner_graphs, core_graph_json)
Ejemplo n.º 6
0
def start_new_process(command_str):             # create new terminal window
  msg = config.to_str()
  new_window_command = "x-terminal-emulator -e".split()
  echo = [sys.executable, "-c", command_str]
  process = Popen(new_window_command + echo + [msg])
  return process
Ejemplo n.º 7
0
def make_graphs(G,window,index,communities,commoner_graphs):
    """
    Generate JSON for NetworkX graph. Update commoner graphs.
    
    This method generates all necessary information from a NetworkX
    graph representation and returns it in a JSON format. It also 
    updates the 'dynamic communities' and individual commoner graphs
    (using make_dynamic_communities and build_commoner_data methods)
    
    :param G: NetworkX graph of interactions in time window 
    :param window: 2-tuple containing start and end dates 
    :param index: integer representing time step
    :param communities: list holding NetworkX dynamic communities 
    (filled in by make_dynamic_communities method) 
    :param commoner_graphs: dictionary mapping each commoner node to
    its interaction history (filled in by build_commoner_data method)
    :returns: tuple containing:
              1. Updated dynamic communities
              2. Updated commoner_graphs
              3. JSON representation of NetworkX graph
              4. Updated NetworkX graph 
    """
    edges_to_remove = []    
    tag_edges = []
    #tag_nodes = {}
    tag_counts = {} #Holds counts of each of the tags      
    cumulative = (index == 0)
    create_count = 0
    comment_count = 0
    convo_count = 0
    trans_count = 0
    
    graph_copy = copy.deepcopy(G) #To avoid screwing future iterations        
    nodeiter = G.nodes(data=True)
    edgeiter = G.edges(data=True)   

    #Filter edges outside time window and add count stats 
    for (u,v,c) in edgeiter:
        c['activations'] = []
        if window[0] is not None:
            edge_exists = False
            for intervals in c['spells']:
                if (window[0] <= cf.to_date(intervals[0]) < window[1]):
                    edge_exists = True
                    break
        else:
            edge_exists = True #Edge always exists in static network 
                     
        if edge_exists == False:
            edges_to_remove.append((u,v,c))
        else:
            copy_edge = graph_copy.edges[u,v]
            if window[0] is not None:
                copy_edge['first_active'] = copy_edge['spells'][0][0]
                copy_edge['last_active'] = copy_edge['spells'][len(copy_edge['spells'])-1][0]
            del graph_copy.edges[u,v]['spells']

    #Remove non-existent edges

    graph_copy.remove_edges_from(edges_to_remove)
       
    #Also remove the tag edges so not to influence k-core calculation
    graph_copy.remove_edges_from((tag_edges))

 
    #Filter nodes outside the time window
    nodes_to_remove = []
    zero_nodes = []
    if window[0] is not None:
        for (n,c) in nodeiter:
            graph_copy.nodes[n]['nodemeta'] = []    
            graph_copy.nodes[n]['date'] = cf.to_str(window[0])
            c['date'] = cf.to_str(window[0]) #TODO: Do both lines need to be here?
            if nx.is_isolate(graph_copy,n):
                nodes_to_remove.append(n) 
                G.nodes[n]['binary_active'] += "0"
                graph_copy.nodes[n]['binary_active'] += "0"                
                if 'type' not in c or c['type'] == cf.user_type:
                    zero_nodes.append((n,c))
            else:
                G.nodes[n]['times_active'] += 1
                graph_copy.nodes[n]['times_active'] += 1
                G.nodes[n]['binary_active'] += "1"
                graph_copy.nodes[n]['binary_active'] += "1"
                
        graph_copy.remove_nodes_from(nodes_to_remove)

       
        #Get rid of spells and actions that fall outside the window range 
        graph_copy = filter_spells(graph_copy,window)


                
    #DO THE KCORE CALCULATIONS HERE
    (core_G,colluders) = dx.weighted_core(graph_copy.to_undirected(),window,cumulative)

    #Add the tags back in
    core_G.add_edges_from(tag_edges)

    to_remove = []
    nodeiter = core_G.nodes(data=True)
    for (n,c) in nodeiter:
        if cf.user_type != '' and 'type' not in c: #If there are meant to be types but we can't find any
            to_remove.append(n)
    core_G.remove_nodes_from(to_remove)
    
    #Recommender data is built from the cumulative graph 
    if not cumulative:
        build_commoner_data(core_G,commoner_graphs,zero_nodes)

    #Remove isolated nodes that exist after removing Basic Income 
    core_G.remove_nodes_from(list(nx.isolates(core_G)))

    #Now compare fronts to previous partitions   
    if not cumulative:
        partition = make_dynamic_communities(core_G,communities,index)
    else:
        undirectedGraph = core_G.to_undirected()
        partition = community.best_partition(undirectedGraph,weight='positivemax') 
    
        
    nodeiter = core_G.nodes(data=True)
    for n,c in nodeiter:
        c['cluster'] = partition[n]
        if cf.LABEL_KEY != "":
            c['label'] = c[cf.LABEL_KEY]
        else:
            c['label'] = str(n)
    core_graph_json = json_graph.node_link_data(core_G)

    if window[1] is not None:
        meta_info = {'date':cf.to_str(window[1]),'colluders':colluders}
        core_graph_json.update(meta_info)
    return (communities,commoner_graphs,core_graph_json,core_G)
Ejemplo n.º 8
0
def make_all_graphs(G,startdate,enddate,spacing,filename):     
    """Generate all JSON files from NetworkX graph
    
    This method generates a JSON file for the cumulative
    graph of interactions, JSON files for interactions within
    each time window (determined by 'spacing' parameter) and 
    JSON files for each commoner that contains their interaction 
    history.
    Files are created in the 'data/output/' directory for each 
    'spacing' as follows:
    ---------------
    graphdata/*spacing*/0.json - JSON for all interaction data
    graphdata/*spacing*/1-X.json - JSON of interaction windows,
    with size dependent on the 'spacing' parameter
    userdata/1-X.json - JSON of each commoner's interaction
    history (filename corresponds to their commoner ID)

    :param G: NetworkX graph of all interactions across time 
    :param startdate: date of first interaction
    :param enddate: date of most recent interaction  
    :param spacing: string representing size of each time window
    (either 'weekly', 'biweekly' or 'monthly'

    """
    print ('filename is ',filename)
    c_Gs = {}
    coms = []
    #Create dicts to hold the interaction data for each commoner    
    for (n,c) in G.nodes(data=True):

        if 'type' not in c or c['type'] == cf.user_type:
            c_Gs[n] = []
        c["tags"] = []   
        c["times_active"] = 0
        c["binary_active"] = ""
    #Dynamic data
    print ('spacing is ',spacing)
    
    if startdate is not None:
        if spacing == 'hourly':
            delta = relativedelta(minutes=-60)
        elif spacing == 'daily':
            delta = relativedelta(days=-1)
        elif spacing == 'weekly':
            delta = relativedelta(weeks=-1)
        elif spacing == 'biweekly':
            delta = relativedelta(weeks=-2)
        elif spacing == 'monthly':
            delta = relativedelta(months=-1)
        else:
            delta = relativedelta(years=-1)
        graph_dir = cf.GRAPHDIR
        user_dir = cf.USERDIR
        
        #Make dates for first interaction 'window'
        w_end = enddate
        w_start = w_end+delta
        
        index = 1
        if os.path.exists(graph_dir):
            shutil.rmtree(graph_dir)
        #if not os.path.exists(graph_dir):
        os.makedirs(graph_dir)
        #Makes the windowed graphs
        while(w_end > startdate):
            print ('windowend is',cf.to_str(w_end))
            (coms,c_Gs,json_G,G_new) = make_graphs(G,(w_start,w_end),index,coms,c_Gs)    
            with open(graph_dir + str(index) + '.json', 'w') as outfile:
                outfile.write(json.dumps(json_G))
            w_end = w_start
            w_start = w_end + delta
            index += 1
        
        if os.path.exists(user_dir):
            shutil.rmtree(user_dir)
        #Make individual historic files for each commoner
        os.makedirs(user_dir)
        #if spacing == 'biweekly':
        for k,v in c_Gs.items():
            if len(v) > 0:
                with open(user_dir + str(k) + '.json', 'w') as outfile:
                    outfile.write(json.dumps(v))   
        
    else:
        graph_dir = cf.GRAPHDIR
        if not os.path.exists(graph_dir):
            shutil.rmtree(graph_dir)
        #if not os.path.exists(graph_dir):
        os.makedirs(graph_dir)
        
    #Make cumulative graph
    (coms,c_Gs,json_G,G_new) = make_graphs(G,(startdate,enddate),0,coms,None)
    
    dynamic_communities = {}
    
    for i in coms:
        if len(i) > 2:
            k_high = 0
            #Give cluster a name based on its most influential node
            for nodes in i:
                if type(nodes) is list:
                    for nodeid in nodes:
                        n = G_new.nodes[nodeid]
                        k = n['kcore']
                        if k >= k_high: #and n['type'] == cf.user_type:
                            #print n
                            central_node = nodeid#n['id']#['name']
                            k_high = k
            dynamic_communities[central_node + str(coms.index(i))] = i
    
    json_G['dynamic_comms'] = dynamic_communities   
    with open(graph_dir + '0.json', 'w') as outfile:
            #outfile.write(json.dumps(json_G))
            json.dump(json_G, outfile)