Exemple #1
0
def generate_csv_file(command, root_path):
    csv_file = "id,listnum,num_items_listed,num_unique_items,num_cluster_switches,avg_cluster_size,num_intrusions,num_perseverations,avg_word_freq,avg_word_aoa\n"
    # parameters should come from snafu gui (ids, filename, category, scheme)
    # filedata = snafu.readX(ids, command['fullpath'], category=command['category'], spellfile=label_to_filepath(command['spellfile'], root_path, "spellfiles"), group=group)
    data = snafu.readX('all',command['fullpath'],category=command['category'], scheme=label_to_filepath(command['cluster_scheme'], root_path, "schemes"), spellfile=label_to_filepath(command['spellfile'], root_path, "spellfiles"), group=True)
    data.hierarchical()

    for subnum, sub in enumerate(data.subs):
        # this converts fluency list from ints to labels
        labeled_lists = snafu.numToItemLabel(data.Xs[subnum],data.items[subnum])
    
        for listnum in range(len(data.Xs[subnum])):
            csv_sub = sub
            csv_listnum = listnum
            csv_numitems = len(data.Xs[subnum][listnum])
            csv_uniqueitem = len(set(data.Xs[subnum][listnum]))
            
            # parameters should come from snafu gui (scheme, clustertype)
            clustersizes = snafu.clusterSize(labeled_lists[listnum], scheme=label_to_filepath(command['cluster_scheme'], root_path, "schemes"), clustertype=command['cluster_type'])
            csv_clusterswitch = len(clustersizes) - 1
            csv_clusterlength = snafu.avgClusterSize(clustersizes)

            # parameters should come from snafu gui (scheme)
            csv_intrusions = len(snafu.intrusions(labeled_lists[listnum],scheme=label_to_filepath(command['cluster_scheme'], root_path, "schemes")))
            csv_perseverations = len(snafu.perseverations(labeled_lists[listnum]))

            csv_freq, temp = snafu.getWordFreq([labeled_lists[listnum]],freq_sub=float(command['freq_sub']))
            csv_aoa, temp = snafu.getWordAoa([labeled_lists[listnum]])

            csv_file += str(csv_sub)+','+str(csv_listnum)+','+str(csv_numitems)+','+str(csv_uniqueitem)+','+str(csv_clusterswitch)+','+str(round(csv_clusterlength,2))+','+str(csv_intrusions)+','+str(csv_perseverations)+','+str(round(csv_freq,2))+','+str(round(csv_aoa,2))+'\n'

    return csv_file
Exemple #2
0
def data_properties(command, root_path):
   
    # turns array into string: "Mean (Std) [Min - Max]"
    def format_output(x):
        x_mean = str(round(np.mean(x),2))
        x_std = str(round(np.std(x),2))
        x_min = str(round(np.min(x),2))
        x_max = str(round(np.max(x),2))
        
        return x_mean + " (" + x_std + ") [" + x_min + " -- " + x_max + "]"
    
    command = command['data_parameters']
  
    if command['factor_type'] == "subject":
        ids = str(command['subject'])
        group = False
    elif command['factor_type'] == "group":
        ids = str(command['group'])            # without str() causes unicode issues for "all" :(
        group = True

    filedata = snafu.readX(ids, command['fullpath'], category=command['category'], spellfile=label_to_filepath(command['spellfile'], root_path, "spellfiles"), group=group)
    filedata.hierarchical()
    Xs = filedata.Xs
    items = filedata.items
    irts = filedata.irts
    numnodes = filedata.numnodes

    # initialize
    avg_cluster_size = []
    avg_num_cluster_switches = []
    num_lists = []
    avg_items_listed = []
    avg_unique_items_listed = []
    intrusions = []
    avg_num_intrusions = []
    perseverations = []
    avg_num_perseverations = []
    
    snafu.wordSetup(freq_sub=float(command['freq_sub']),freqfile=label_to_filepath(command['freqfile'],root_path,"frequency"),aoafile=label_to_filepath(command['aoafile'],root_path,"aoa"))
    totol_words = 0
    avg_word_freq = []
    word_freq_excluded = []
    avg_word_aoa = []
    word_aoa_excluded = []

    # kinda messy...
    for subjnum in range(len(Xs)):
        Xs[subjnum] = snafu.numToAnimal(Xs[subjnum], items[subjnum])
        if command['cluster_scheme'] != "None":
            cluster_sizes = snafu.clusterSize(Xs[subjnum], label_to_filepath(command['cluster_scheme'], root_path, "schemes"), clustertype=command['cluster_type'])
            avg_cluster_size.append(snafu.avgClusterSize(cluster_sizes))
            avg_num_cluster_switches.append(snafu.avgNumClusterSwitches(cluster_sizes))
            intrusions.append(snafu.intrusions(Xs[subjnum], label_to_filepath(command['cluster_scheme'], root_path, "schemes")))
            avg_num_intrusions.append(snafu.avgNumIntrusions(intrusions[-1]))
            perseverations.append(snafu.perseverations(Xs[subjnum]))
            avg_num_perseverations.append(snafu.avgNumPerseverations(Xs[subjnum]))
        else:
            avg_cluster_size = ["n/a"]
            avg_num_cluster_switches = ["n/a"]
            avg_num_intrusions = ["n/a"]
            avg_num_perseverations = ["n/a"]
        num_lists.append(len(Xs[subjnum]))
        avg_items_listed.append(np.mean([len(i) for i in Xs[subjnum]]))
        avg_unique_items_listed.append(np.mean([len(set(i)) for i in Xs[subjnum]]))

        tmp1, tmp2 = snafu.getWordFreq(Xs[subjnum],freq_sub=float(command['freq_sub']))
        avg_word_freq.append(tmp1)
        for i in tmp2:
            word_freq_excluded.append(i)
        tmp1, tmp2 = snafu.getWordAoa(Xs[subjnum])
        avg_word_aoa.append(tmp1)
        for i in tmp2:
            word_aoa_excluded.append(i)
        for i in Xs[subjnum]:
            totol_words += len(i)

    # clean up / format data to send back, still messy
    intrusions = snafu.flatten_list(intrusions)
    perseverations = snafu.flatten_list(perseverations)

    if len(Xs) > 1:
        if command['cluster_scheme'] != "None":
            avg_cluster_size = format_output(avg_cluster_size)
            avg_num_cluster_switches = format_output(avg_num_cluster_switches)
            avg_num_intrusions = format_output(avg_num_intrusions)
            avg_num_perseverations = format_output(avg_num_perseverations)
        num_lists = format_output(num_lists)
        avg_items_listed = format_output(avg_items_listed)
        avg_unique_items_listed = format_output(avg_unique_items_listed)
        avg_word_freq=format_output(avg_word_freq)
        avg_word_aoa=format_output(avg_word_aoa)
 
    word_freq_rate = 0
    for i in word_freq_excluded:
        word_freq_rate += len(i)
    word_freq_rate = str(round(float(word_freq_rate)/totol_words*100,2))+'%'
    word_aoa_rate = 0
    for i in word_aoa_excluded:
        word_aoa_rate += len(i)
    word_aoa_rate = str(round(float(word_aoa_rate)/totol_words*100,2))+'%'
    csv_file = generate_csv_file(command, root_path)

    return { "type": "data_properties", 
             "num_lists": num_lists,
             "avg_items_listed": avg_items_listed,
             "intrusions": intrusions,
             "perseverations": perseverations,
             "avg_num_intrusions": avg_num_intrusions,
             "avg_num_perseverations": avg_num_perseverations,
             "avg_unique_items_listed": avg_unique_items_listed,
             "avg_num_cluster_switches": avg_num_cluster_switches,
             "avg_cluster_size": avg_cluster_size,
             "avg_word_freq": avg_word_freq,
             "avg_word_aoa": avg_word_aoa,
             "word_freq_rate": word_freq_rate,
             "word_freq_excluded": word_freq_excluded,
             "word_aoa_rate": word_aoa_rate,
             "word_aoa_excluded": word_aoa_excluded,
             "csv_file": csv_file }
Exemple #3
0
def network_properties(command, root_path):
    subj_props = command['data_parameters']
    command = command['network_parameters']

    # U-INVITE won't work with perseverations
    if command['network_method'] == "U-INVITE":
        removePerseverations=True
    else:
        removePerseverations=False
    
    if subj_props['factor_type'] == "subject":
        ids = str(subj_props['subject'])
        group = False
    elif subj_props['factor_type'] == "group":
        ids = str(subj_props['group'])            # without str() causes unicode issues for "all" :(
        group = True

    filedata = snafu.readX(ids, subj_props['fullpath'], category=subj_props['category'], spellfile=label_to_filepath(subj_props['spellfile'], root_path, "spellfiles"), removePerseverations=removePerseverations, group=group)
    filedata.nonhierarchical()
    Xs = filedata.Xs
    items = filedata.items
    irts = filedata.irts
    numnodes = filedata.numnodes
    
    toydata=snafu.DataModel({
            'numx': len(Xs),
            'trim': 1,
            'jump': float(command['jump_probability']),
            'jumptype': command['jump_type'],
            'priming': float(command['priming_probability']),
            'startX': command['first_item']})
    fitinfo=snafu.Fitinfo({
            'prior_method': "zeroinflatedbetabinomial",
            'prior_a': 1,
            'prior_b': 2,
            'zibb_p': 0.5,
            'startGraph': command['starting_graph'],
            'goni_size': int(command['goni_windowsize']),
            'goni_threshold': int(command['goni_threshold']),
            'followtype': "avg", 
            'prune_limit': 100,
            'triangle_limit': 100,
            'other_limit': 100})
   
    if command['prior']=="None":
        prior=None
    elif command['prior']=="USF":
        usf_file_path = "/snet/USF_animal_subset.snet"
        filename = root_path + usf_file_path
        
        usf_graph, usf_items = snafu.read_graph(filename)
        usf_numnodes = len(usf_items)
        priordict = snafu.genGraphPrior([usf_graph], [usf_items], fitinfo=fitinfo)
        prior = (priordict, usf_items)
        
    if command['network_method']=="RW":
        bestgraph = snafu.noHidden(Xs, numnodes)
    elif command['network_method']=="Goni":
        bestgraph = snafu.goni(Xs, numnodes, td=toydata, valid=0, fitinfo=fitinfo)
    elif command['network_method']=="Chan":
        bestgraph = snafu.chan(Xs, numnodes)
    elif command['network_method']=="Kenett":
        bestgraph = snafu.kenett(Xs, numnodes)
    elif command['network_method']=="FirstEdge":
        bestgraph = snafu.firstEdge(Xs, numnodes)
    elif command['network_method']=="U-INVITE":
        bestgraph, ll = snafu.uinvite(Xs, toydata, numnodes, fitinfo=fitinfo, debug=False, prior=prior)
    
    nxg = nx.to_networkx_graph(bestgraph)
    nxg_json = jsonGraph(nxg, items)
    
    return graph_properties(nxg,nxg_json)
Exemple #4
0
        'zibb_p': 0.5,
        'prior_a': 2,
        'prior_b': 1,
        'goni_size': 2,
        'goni_threshold': 2,
        'followtype': "avg", 
        'prune_limit': np.inf,
        'triangle_limit': np.inf,
        'other_limit': np.inf })

subs=["S"+str(i) for i in range(101,151)]
filepath = "fluency/spring2017.csv"
category="animals"

# read in data from file, flattening all participants together
Xs_flat, groupitems, irtdata, groupnumnodes = snafu.readX(subs,category,filepath,removePerseverations=True,spellfile="spellfiles/zemla_spellfile.csv",flatten=True)

# read data from file, preserving hierarchical structure
Xs_hier, items, irtdata, numnodes, groupitems, groupnumnodes = snafu.readX(subs,category,filepath,removePerseverations=True,spellfile="spellfiles/zemla_spellfile.csv")

graphs=[]
for method in methods:

    # Estimate the best network using a Naive Random Walk
    if method=="rw":
        graph = snafu.nrw(Xs_flat, groupnumnodes)

    # Estimate the best network using Goni
    if method=="goni":
        graph = snafu.goni(Xs_flat, groupnumnodes, fitinfo=fitinfo)
        
# the hierarchical model will take a long time to run!! to test it you can fit a smaller number of participants, e.g. range(101,111)
subs = ["S" + str(i) for i in range(101, 151)]
filepath = "fluency/spring2017.csv"
category = "animals"

fo = open('individual_graphs.csv', 'w')
fo.write('subj,method,item1,item2,edge\n')

for method in methods:
    # add snafu.hierarhicalUinvite method here

    for sub in subs:
        Xs, items, irtdata, numnodes = snafu.readX(
            sub,
            category,
            filepath,
            removePerseverations=True,
            spellfile="spellfiles/zemla_spellfile.csv")
        if method == "rw":
            graph = snafu.nrw(Xs, numnodes)
        if method == "goni":
            graph = snafu.goni(Xs, numnodes, fitinfo=fitinfo)
        if method == "chan":
            graph = snafu.chan(Xs, numnodes)
        if method == "kenett":
            graph = snafu.kenett(Xs, numnodes)
        if method == "fe":
            graph = snafu.firstEdge(Xs, numnodes)
        if method == "uinvite_flat":
            graph, ll = snafu.uinvite(Xs, toydata, numnodes, fitinfo=fitinfo)
Exemple #6
0
### What does this script do?
###
### 1) Import the USF network from disk
### 2) Generate toy data (censored random walks) from that network for many pseudo-participants
### 3) Estimate the best network from the data using one or several methods, showing how the method
###    improves as more participants are added
### 4) Write data about those graphs to a CSV (cost, SDT measures)

import snafu
import networkx as nx
import numpy as np

subs = ["S" + str(i) for i in range(101, 151)]
filepath = "fluency/spring2017.csv"
category = "animals"

# read data from file, preserving hierarchical structure
Xs, items, irtdata, numnodes, groupitems, groupnumnodes = snafu.readX(
    subs,
    category,
    filepath,
    removePerseverations=False,
    spellfile="spellfiles/zemla_spellfile.csv")

# for each participant, calculate the average cluster size and average number of perservations across that participant's lists
cluster_sizes = []
perseverations = []
for i in range(len(Xs)):
    cluster_sizes.append(snafu.avgClusterSize(Xs[i]))
    perseverations.append(snafu.avgNumPerseverations(Xs[i]))
Exemple #7
0
def data_properties(command, root_path):

    # turns array into string: "Mean (Std) [Min - Max]"
    def format_output(x):
        x_mean = str(round(np.mean(x), 2))
        x_std = str(round(np.std(x), 2))
        x_min = str(round(np.min(x), 2))
        x_max = str(round(np.max(x), 2))

        return x_mean + " (" + x_std + ") [" + x_min + " -- " + x_max + "]"

    command = command['data_parameters']

    if command['factor_type'] == "subject":
        Xs, items, irts, numnodes = [
            [i] for i in snafu.readX(command['subject'],
                                     command['category'],
                                     command['fullpath'],
                                     spellfile=label_to_filepath(
                                         command['spellfile'], root_path,
                                         "spellfiles"))
        ]  # embed each return variable in list so that format is the same as when factor=group
    elif command['factor_type'] == "group":
        Xs, items, irts, numnodes, groupitems, groupnumnodes = snafu.readX(
            command['subject'],
            command['category'],
            command['fullpath'],
            spellfile=label_to_filepath(command['spellfile'], root_path,
                                        "spellfiles"),
            group=command['group'])

    # initialize
    avg_cluster_size = []
    avg_num_cluster_switches = []
    num_lists = []
    avg_items_listed = []
    avg_unique_items_listed = []
    intrusions = []
    avg_num_intrusions = []
    perseverations = []
    avg_num_perseverations = []

    # kinda messy...
    for subjnum in range(len(Xs)):
        Xs[subjnum] = snafu.numToAnimal(Xs[subjnum], items[subjnum])
        if command['cluster_scheme'] != "None":
            cluster_sizes = snafu.clusterSize(
                Xs[subjnum],
                label_to_filepath(command['cluster_scheme'], root_path,
                                  "schemes"),
                clustertype=command['cluster_type'])
            avg_cluster_size.append(snafu.avgClusterSize(cluster_sizes))
            avg_num_cluster_switches.append(
                snafu.avgNumClusterSwitches(cluster_sizes))
            intrusions.append(
                snafu.intrusions(
                    Xs[subjnum],
                    label_to_filepath(command['cluster_scheme'], root_path,
                                      "schemes")))
            avg_num_intrusions.append(snafu.avgNumIntrusions(intrusions[-1]))
            perseverations.append(snafu.perseverations(Xs[subjnum]))
            avg_num_perseverations.append(
                snafu.avgNumPerseverations(Xs[subjnum]))
        else:
            avg_cluster_size = ["n/a"]
            avg_num_cluster_switches = ["n/a"]
            avg_num_intrusions = ["n/a"]
            avg_num_perseverations = ["n/a"]
        num_lists.append(len(Xs[subjnum]))
        avg_items_listed.append(np.mean([len(i) for i in Xs[subjnum]]))
        avg_unique_items_listed.append(
            np.mean([len(set(i)) for i in Xs[subjnum]]))

    # clean up / format data to send back, still messy
    intrusions = snafu.flatten_list(intrusions)
    perseverations = snafu.flatten_list(perseverations)

    if len(Xs) > 1:
        if command['cluster_scheme'] != "None":
            avg_cluster_size = format_output(avg_cluster_size)
            avg_num_cluster_switches = format_output(avg_num_cluster_switches)
            avg_num_intrusions = format_output(avg_num_intrusions)
            avg_num_perseverations = format_output(avg_num_perseverations)
        num_lists = format_output(num_lists)
        avg_items_listed = format_output(avg_items_listed)
        avg_unique_items_listed = format_output(avg_unique_items_listed)

    return {
        "type": "data_properties",
        "num_lists": num_lists,
        "avg_items_listed": avg_items_listed,
        "intrusions": intrusions,
        "perseverations": perseverations,
        "avg_num_intrusions": avg_num_intrusions,
        "avg_num_perseverations": avg_num_perseverations,
        "avg_unique_items_listed": avg_unique_items_listed,
        "avg_num_cluster_switches": avg_num_cluster_switches,
        "avg_cluster_size": avg_cluster_size
    }
Exemple #8
0
def network_properties(command, root_path):
    subj_props = command['data_parameters']
    command = command['network_parameters']

    # U-INVITE won't work with perseverations
    if command['network_method'] == "U-INVITE":
        removePerseverations = True
    else:
        removePerseverations = False

    if subj_props['factor_type'] == "subject":
        Xs, items, irts, numnodes = snafu.readX(
            subj_props['subject'],
            subj_props['category'],
            subj_props['fullpath'],
            spellfile=label_to_filepath(subj_props['spellfile'], root_path,
                                        "spellfiles"),
            removePerseverations=removePerseverations)
    elif subj_props['factor_type'] == "group":
        Xs, items, irts, numnodes = snafu.readX(
            subj_props['subject'],
            subj_props['category'],
            subj_props['fullpath'],
            spellfile=label_to_filepath(subj_props['spellfile'], root_path,
                                        "spellfiles"),
            removePerseverations=removePerseverations,
            group=subj_props['group'],
            flatten=True)

    toydata = snafu.Data({
        'numx': len(Xs),
        'trim': 1,
        'jump': float(command['jump_probability']),
        'jumptype': command['jump_type'],
        'priming': float(command['priming_probability']),
        'startX': command['first_item']
    })
    fitinfo = snafu.Fitinfo({
        'prior_method': "betabinomial",
        'prior_a': 1,
        'prior_b': 1,
        'startGraph': command['starting_graph'],
        'goni_size': int(command['goni_windowsize']),
        'goni_threshold': int(command['goni_threshold']),
        'followtype': "avg",
        'prune_limit': 100,
        'triangle_limit': 100,
        'other_limit': 100
    })

    if command['prior'] == "None":
        prior = None
    elif command['prior'] == "USF":
        usf_file_path = "/snet/USF_animal_subset.snet"
        filename = root_path + usf_file_path

        usf_graph, usf_items = snafu.read_graph(filename)
        usf_numnodes = len(usf_items)
        priordict = snafu.genGraphPrior([usf_graph], [usf_items],
                                        fitinfo=fitinfo)
        prior = (priordict, usf_items)

    if command['network_method'] == "RW":
        bestgraph = snafu.noHidden(Xs, numnodes)
    elif command['network_method'] == "Goni":
        bestgraph = snafu.goni(Xs,
                               numnodes,
                               td=toydata,
                               valid=0,
                               fitinfo=fitinfo)
    elif command['network_method'] == "Chan":
        bestgraph = snafu.chan(Xs, numnodes)
    elif command['network_method'] == "Kenett":
        bestgraph = snafu.kenett(Xs, numnodes)
    elif command['network_method'] == "FirstEdge":
        bestgraph = snafu.firstEdge(Xs, numnodes)
    elif command['network_method'] == "U-INVITE":
        bestgraph, ll = snafu.uinvite(Xs,
                                      toydata,
                                      numnodes,
                                      fitinfo=fitinfo,
                                      debug=False,
                                      prior=prior)

    nxg = nx.to_networkx_graph(bestgraph)

    node_degree = np.mean(dict(nxg.degree()).values())
    nxg_json = jsonGraph(nxg, items)
    clustering_coefficient = nx.average_clustering(nxg)
    try:
        aspl = nx.average_shortest_path_length(nxg)
    except:
        aspl = "disjointed graph"

    return {
        "type": "network_properties",
        "node_degree": node_degree,
        "clustering_coefficient": clustering_coefficient,
        "aspl": aspl,
        "graph": nxg_json
    }
### What does this script do?
### 
### 1) Import the USF network from disk
### 2) Generate toy data (censored random walks) from that network for many pseudo-participants
### 3) Estimate the best network from the data using one or several methods, showing how the method
###    improves as more participants are added
### 4) Write data about those graphs to a CSV (cost, SDT measures)


import snafu
import networkx as nx
import numpy as np

subs=["S"+str(i) for i in range(101,151)]
filepath = "fluency/spring2017.csv"
category="animals"

# read data from file, preserving hierarchical structure
Xs, items, irtdata, numnodes, groupitems, groupnumnodes = snafu.readX(subs,filepath,category=category,removePerseverations=False,spellfile="spellfiles/zemla_spellfile.csv")

# for each participant, calculate the average cluster size and average number of perservations across that participant's lists
cluster_sizes = []
perseverations = []
for i in range(len(Xs)):
    cluster_sizes.append(snafu.avgClusterSize(Xs[i]))
    perseverations.append(snafu.avgNumPerseverations(Xs[i]))

        'zibb_p': 0.5,
        'prior_a': 2,
        'prior_b': 1,
        'goni_size': 2,
        'goni_threshold': 2,
        'followtype': "avg", 
        'prune_limit': 100,
        'triangle_limit': 100,
        'other_limit': 100 })

subs=["S"+str(i) for i in range(101,151)]
filepath = "fluency/spring2017.csv"
category="tools"

# read in data from file, flattening all participants together
filedata = snafu.readX(subs,filepath,category=category,removePerseverations=True,spellfile="spellfiles/tools_zemla_spellfile.csv")

filedata.nonhierarchical()
Xs_flat = filedata.Xs
groupnumnodes = filedata.numnodes
groupitems = filedata.groupitems

filedata.hierarchical()
Xs_hier = filedata.Xs
items = filedata.items
numnodes = filedata.numnodes

graphs=[]
for method in methods:

    # Estimate the best network using a Naive Random Walk