def generate_csv_file(command, root_path): csv_file = "id,listnum,num_items_listed,num_unique_items,num_cluster_switches,avg_cluster_size,num_intrusions,num_perseverations,avg_word_freq,avg_word_aoa\n" # parameters should come from snafu gui (ids, filename, category, scheme) # filedata = snafu.readX(ids, command['fullpath'], category=command['category'], spellfile=label_to_filepath(command['spellfile'], root_path, "spellfiles"), group=group) data = snafu.readX('all',command['fullpath'],category=command['category'], scheme=label_to_filepath(command['cluster_scheme'], root_path, "schemes"), spellfile=label_to_filepath(command['spellfile'], root_path, "spellfiles"), group=True) data.hierarchical() for subnum, sub in enumerate(data.subs): # this converts fluency list from ints to labels labeled_lists = snafu.numToItemLabel(data.Xs[subnum],data.items[subnum]) for listnum in range(len(data.Xs[subnum])): csv_sub = sub csv_listnum = listnum csv_numitems = len(data.Xs[subnum][listnum]) csv_uniqueitem = len(set(data.Xs[subnum][listnum])) # parameters should come from snafu gui (scheme, clustertype) clustersizes = snafu.clusterSize(labeled_lists[listnum], scheme=label_to_filepath(command['cluster_scheme'], root_path, "schemes"), clustertype=command['cluster_type']) csv_clusterswitch = len(clustersizes) - 1 csv_clusterlength = snafu.avgClusterSize(clustersizes) # parameters should come from snafu gui (scheme) csv_intrusions = len(snafu.intrusions(labeled_lists[listnum],scheme=label_to_filepath(command['cluster_scheme'], root_path, "schemes"))) csv_perseverations = len(snafu.perseverations(labeled_lists[listnum])) csv_freq, temp = snafu.getWordFreq([labeled_lists[listnum]],freq_sub=float(command['freq_sub'])) csv_aoa, temp = snafu.getWordAoa([labeled_lists[listnum]]) csv_file += str(csv_sub)+','+str(csv_listnum)+','+str(csv_numitems)+','+str(csv_uniqueitem)+','+str(csv_clusterswitch)+','+str(round(csv_clusterlength,2))+','+str(csv_intrusions)+','+str(csv_perseverations)+','+str(round(csv_freq,2))+','+str(round(csv_aoa,2))+'\n' return csv_file
def data_properties(command, root_path): # turns array into string: "Mean (Std) [Min - Max]" def format_output(x): x_mean = str(round(np.mean(x),2)) x_std = str(round(np.std(x),2)) x_min = str(round(np.min(x),2)) x_max = str(round(np.max(x),2)) return x_mean + " (" + x_std + ") [" + x_min + " -- " + x_max + "]" command = command['data_parameters'] if command['factor_type'] == "subject": ids = str(command['subject']) group = False elif command['factor_type'] == "group": ids = str(command['group']) # without str() causes unicode issues for "all" :( group = True filedata = snafu.readX(ids, command['fullpath'], category=command['category'], spellfile=label_to_filepath(command['spellfile'], root_path, "spellfiles"), group=group) filedata.hierarchical() Xs = filedata.Xs items = filedata.items irts = filedata.irts numnodes = filedata.numnodes # initialize avg_cluster_size = [] avg_num_cluster_switches = [] num_lists = [] avg_items_listed = [] avg_unique_items_listed = [] intrusions = [] avg_num_intrusions = [] perseverations = [] avg_num_perseverations = [] snafu.wordSetup(freq_sub=float(command['freq_sub']),freqfile=label_to_filepath(command['freqfile'],root_path,"frequency"),aoafile=label_to_filepath(command['aoafile'],root_path,"aoa")) totol_words = 0 avg_word_freq = [] word_freq_excluded = [] avg_word_aoa = [] word_aoa_excluded = [] # kinda messy... for subjnum in range(len(Xs)): Xs[subjnum] = snafu.numToAnimal(Xs[subjnum], items[subjnum]) if command['cluster_scheme'] != "None": cluster_sizes = snafu.clusterSize(Xs[subjnum], label_to_filepath(command['cluster_scheme'], root_path, "schemes"), clustertype=command['cluster_type']) avg_cluster_size.append(snafu.avgClusterSize(cluster_sizes)) avg_num_cluster_switches.append(snafu.avgNumClusterSwitches(cluster_sizes)) intrusions.append(snafu.intrusions(Xs[subjnum], label_to_filepath(command['cluster_scheme'], root_path, "schemes"))) avg_num_intrusions.append(snafu.avgNumIntrusions(intrusions[-1])) perseverations.append(snafu.perseverations(Xs[subjnum])) avg_num_perseverations.append(snafu.avgNumPerseverations(Xs[subjnum])) else: avg_cluster_size = ["n/a"] avg_num_cluster_switches = ["n/a"] avg_num_intrusions = ["n/a"] avg_num_perseverations = ["n/a"] num_lists.append(len(Xs[subjnum])) avg_items_listed.append(np.mean([len(i) for i in Xs[subjnum]])) avg_unique_items_listed.append(np.mean([len(set(i)) for i in Xs[subjnum]])) tmp1, tmp2 = snafu.getWordFreq(Xs[subjnum],freq_sub=float(command['freq_sub'])) avg_word_freq.append(tmp1) for i in tmp2: word_freq_excluded.append(i) tmp1, tmp2 = snafu.getWordAoa(Xs[subjnum]) avg_word_aoa.append(tmp1) for i in tmp2: word_aoa_excluded.append(i) for i in Xs[subjnum]: totol_words += len(i) # clean up / format data to send back, still messy intrusions = snafu.flatten_list(intrusions) perseverations = snafu.flatten_list(perseverations) if len(Xs) > 1: if command['cluster_scheme'] != "None": avg_cluster_size = format_output(avg_cluster_size) avg_num_cluster_switches = format_output(avg_num_cluster_switches) avg_num_intrusions = format_output(avg_num_intrusions) avg_num_perseverations = format_output(avg_num_perseverations) num_lists = format_output(num_lists) avg_items_listed = format_output(avg_items_listed) avg_unique_items_listed = format_output(avg_unique_items_listed) avg_word_freq=format_output(avg_word_freq) avg_word_aoa=format_output(avg_word_aoa) word_freq_rate = 0 for i in word_freq_excluded: word_freq_rate += len(i) word_freq_rate = str(round(float(word_freq_rate)/totol_words*100,2))+'%' word_aoa_rate = 0 for i in word_aoa_excluded: word_aoa_rate += len(i) word_aoa_rate = str(round(float(word_aoa_rate)/totol_words*100,2))+'%' csv_file = generate_csv_file(command, root_path) return { "type": "data_properties", "num_lists": num_lists, "avg_items_listed": avg_items_listed, "intrusions": intrusions, "perseverations": perseverations, "avg_num_intrusions": avg_num_intrusions, "avg_num_perseverations": avg_num_perseverations, "avg_unique_items_listed": avg_unique_items_listed, "avg_num_cluster_switches": avg_num_cluster_switches, "avg_cluster_size": avg_cluster_size, "avg_word_freq": avg_word_freq, "avg_word_aoa": avg_word_aoa, "word_freq_rate": word_freq_rate, "word_freq_excluded": word_freq_excluded, "word_aoa_rate": word_aoa_rate, "word_aoa_excluded": word_aoa_excluded, "csv_file": csv_file }
def network_properties(command, root_path): subj_props = command['data_parameters'] command = command['network_parameters'] # U-INVITE won't work with perseverations if command['network_method'] == "U-INVITE": removePerseverations=True else: removePerseverations=False if subj_props['factor_type'] == "subject": ids = str(subj_props['subject']) group = False elif subj_props['factor_type'] == "group": ids = str(subj_props['group']) # without str() causes unicode issues for "all" :( group = True filedata = snafu.readX(ids, subj_props['fullpath'], category=subj_props['category'], spellfile=label_to_filepath(subj_props['spellfile'], root_path, "spellfiles"), removePerseverations=removePerseverations, group=group) filedata.nonhierarchical() Xs = filedata.Xs items = filedata.items irts = filedata.irts numnodes = filedata.numnodes toydata=snafu.DataModel({ 'numx': len(Xs), 'trim': 1, 'jump': float(command['jump_probability']), 'jumptype': command['jump_type'], 'priming': float(command['priming_probability']), 'startX': command['first_item']}) fitinfo=snafu.Fitinfo({ 'prior_method': "zeroinflatedbetabinomial", 'prior_a': 1, 'prior_b': 2, 'zibb_p': 0.5, 'startGraph': command['starting_graph'], 'goni_size': int(command['goni_windowsize']), 'goni_threshold': int(command['goni_threshold']), 'followtype': "avg", 'prune_limit': 100, 'triangle_limit': 100, 'other_limit': 100}) if command['prior']=="None": prior=None elif command['prior']=="USF": usf_file_path = "/snet/USF_animal_subset.snet" filename = root_path + usf_file_path usf_graph, usf_items = snafu.read_graph(filename) usf_numnodes = len(usf_items) priordict = snafu.genGraphPrior([usf_graph], [usf_items], fitinfo=fitinfo) prior = (priordict, usf_items) if command['network_method']=="RW": bestgraph = snafu.noHidden(Xs, numnodes) elif command['network_method']=="Goni": bestgraph = snafu.goni(Xs, numnodes, td=toydata, valid=0, fitinfo=fitinfo) elif command['network_method']=="Chan": bestgraph = snafu.chan(Xs, numnodes) elif command['network_method']=="Kenett": bestgraph = snafu.kenett(Xs, numnodes) elif command['network_method']=="FirstEdge": bestgraph = snafu.firstEdge(Xs, numnodes) elif command['network_method']=="U-INVITE": bestgraph, ll = snafu.uinvite(Xs, toydata, numnodes, fitinfo=fitinfo, debug=False, prior=prior) nxg = nx.to_networkx_graph(bestgraph) nxg_json = jsonGraph(nxg, items) return graph_properties(nxg,nxg_json)
'zibb_p': 0.5, 'prior_a': 2, 'prior_b': 1, 'goni_size': 2, 'goni_threshold': 2, 'followtype': "avg", 'prune_limit': np.inf, 'triangle_limit': np.inf, 'other_limit': np.inf }) subs=["S"+str(i) for i in range(101,151)] filepath = "fluency/spring2017.csv" category="animals" # read in data from file, flattening all participants together Xs_flat, groupitems, irtdata, groupnumnodes = snafu.readX(subs,category,filepath,removePerseverations=True,spellfile="spellfiles/zemla_spellfile.csv",flatten=True) # read data from file, preserving hierarchical structure Xs_hier, items, irtdata, numnodes, groupitems, groupnumnodes = snafu.readX(subs,category,filepath,removePerseverations=True,spellfile="spellfiles/zemla_spellfile.csv") graphs=[] for method in methods: # Estimate the best network using a Naive Random Walk if method=="rw": graph = snafu.nrw(Xs_flat, groupnumnodes) # Estimate the best network using Goni if method=="goni": graph = snafu.goni(Xs_flat, groupnumnodes, fitinfo=fitinfo)
# the hierarchical model will take a long time to run!! to test it you can fit a smaller number of participants, e.g. range(101,111) subs = ["S" + str(i) for i in range(101, 151)] filepath = "fluency/spring2017.csv" category = "animals" fo = open('individual_graphs.csv', 'w') fo.write('subj,method,item1,item2,edge\n') for method in methods: # add snafu.hierarhicalUinvite method here for sub in subs: Xs, items, irtdata, numnodes = snafu.readX( sub, category, filepath, removePerseverations=True, spellfile="spellfiles/zemla_spellfile.csv") if method == "rw": graph = snafu.nrw(Xs, numnodes) if method == "goni": graph = snafu.goni(Xs, numnodes, fitinfo=fitinfo) if method == "chan": graph = snafu.chan(Xs, numnodes) if method == "kenett": graph = snafu.kenett(Xs, numnodes) if method == "fe": graph = snafu.firstEdge(Xs, numnodes) if method == "uinvite_flat": graph, ll = snafu.uinvite(Xs, toydata, numnodes, fitinfo=fitinfo)
### What does this script do? ### ### 1) Import the USF network from disk ### 2) Generate toy data (censored random walks) from that network for many pseudo-participants ### 3) Estimate the best network from the data using one or several methods, showing how the method ### improves as more participants are added ### 4) Write data about those graphs to a CSV (cost, SDT measures) import snafu import networkx as nx import numpy as np subs = ["S" + str(i) for i in range(101, 151)] filepath = "fluency/spring2017.csv" category = "animals" # read data from file, preserving hierarchical structure Xs, items, irtdata, numnodes, groupitems, groupnumnodes = snafu.readX( subs, category, filepath, removePerseverations=False, spellfile="spellfiles/zemla_spellfile.csv") # for each participant, calculate the average cluster size and average number of perservations across that participant's lists cluster_sizes = [] perseverations = [] for i in range(len(Xs)): cluster_sizes.append(snafu.avgClusterSize(Xs[i])) perseverations.append(snafu.avgNumPerseverations(Xs[i]))
def data_properties(command, root_path): # turns array into string: "Mean (Std) [Min - Max]" def format_output(x): x_mean = str(round(np.mean(x), 2)) x_std = str(round(np.std(x), 2)) x_min = str(round(np.min(x), 2)) x_max = str(round(np.max(x), 2)) return x_mean + " (" + x_std + ") [" + x_min + " -- " + x_max + "]" command = command['data_parameters'] if command['factor_type'] == "subject": Xs, items, irts, numnodes = [ [i] for i in snafu.readX(command['subject'], command['category'], command['fullpath'], spellfile=label_to_filepath( command['spellfile'], root_path, "spellfiles")) ] # embed each return variable in list so that format is the same as when factor=group elif command['factor_type'] == "group": Xs, items, irts, numnodes, groupitems, groupnumnodes = snafu.readX( command['subject'], command['category'], command['fullpath'], spellfile=label_to_filepath(command['spellfile'], root_path, "spellfiles"), group=command['group']) # initialize avg_cluster_size = [] avg_num_cluster_switches = [] num_lists = [] avg_items_listed = [] avg_unique_items_listed = [] intrusions = [] avg_num_intrusions = [] perseverations = [] avg_num_perseverations = [] # kinda messy... for subjnum in range(len(Xs)): Xs[subjnum] = snafu.numToAnimal(Xs[subjnum], items[subjnum]) if command['cluster_scheme'] != "None": cluster_sizes = snafu.clusterSize( Xs[subjnum], label_to_filepath(command['cluster_scheme'], root_path, "schemes"), clustertype=command['cluster_type']) avg_cluster_size.append(snafu.avgClusterSize(cluster_sizes)) avg_num_cluster_switches.append( snafu.avgNumClusterSwitches(cluster_sizes)) intrusions.append( snafu.intrusions( Xs[subjnum], label_to_filepath(command['cluster_scheme'], root_path, "schemes"))) avg_num_intrusions.append(snafu.avgNumIntrusions(intrusions[-1])) perseverations.append(snafu.perseverations(Xs[subjnum])) avg_num_perseverations.append( snafu.avgNumPerseverations(Xs[subjnum])) else: avg_cluster_size = ["n/a"] avg_num_cluster_switches = ["n/a"] avg_num_intrusions = ["n/a"] avg_num_perseverations = ["n/a"] num_lists.append(len(Xs[subjnum])) avg_items_listed.append(np.mean([len(i) for i in Xs[subjnum]])) avg_unique_items_listed.append( np.mean([len(set(i)) for i in Xs[subjnum]])) # clean up / format data to send back, still messy intrusions = snafu.flatten_list(intrusions) perseverations = snafu.flatten_list(perseverations) if len(Xs) > 1: if command['cluster_scheme'] != "None": avg_cluster_size = format_output(avg_cluster_size) avg_num_cluster_switches = format_output(avg_num_cluster_switches) avg_num_intrusions = format_output(avg_num_intrusions) avg_num_perseverations = format_output(avg_num_perseverations) num_lists = format_output(num_lists) avg_items_listed = format_output(avg_items_listed) avg_unique_items_listed = format_output(avg_unique_items_listed) return { "type": "data_properties", "num_lists": num_lists, "avg_items_listed": avg_items_listed, "intrusions": intrusions, "perseverations": perseverations, "avg_num_intrusions": avg_num_intrusions, "avg_num_perseverations": avg_num_perseverations, "avg_unique_items_listed": avg_unique_items_listed, "avg_num_cluster_switches": avg_num_cluster_switches, "avg_cluster_size": avg_cluster_size }
def network_properties(command, root_path): subj_props = command['data_parameters'] command = command['network_parameters'] # U-INVITE won't work with perseverations if command['network_method'] == "U-INVITE": removePerseverations = True else: removePerseverations = False if subj_props['factor_type'] == "subject": Xs, items, irts, numnodes = snafu.readX( subj_props['subject'], subj_props['category'], subj_props['fullpath'], spellfile=label_to_filepath(subj_props['spellfile'], root_path, "spellfiles"), removePerseverations=removePerseverations) elif subj_props['factor_type'] == "group": Xs, items, irts, numnodes = snafu.readX( subj_props['subject'], subj_props['category'], subj_props['fullpath'], spellfile=label_to_filepath(subj_props['spellfile'], root_path, "spellfiles"), removePerseverations=removePerseverations, group=subj_props['group'], flatten=True) toydata = snafu.Data({ 'numx': len(Xs), 'trim': 1, 'jump': float(command['jump_probability']), 'jumptype': command['jump_type'], 'priming': float(command['priming_probability']), 'startX': command['first_item'] }) fitinfo = snafu.Fitinfo({ 'prior_method': "betabinomial", 'prior_a': 1, 'prior_b': 1, 'startGraph': command['starting_graph'], 'goni_size': int(command['goni_windowsize']), 'goni_threshold': int(command['goni_threshold']), 'followtype': "avg", 'prune_limit': 100, 'triangle_limit': 100, 'other_limit': 100 }) if command['prior'] == "None": prior = None elif command['prior'] == "USF": usf_file_path = "/snet/USF_animal_subset.snet" filename = root_path + usf_file_path usf_graph, usf_items = snafu.read_graph(filename) usf_numnodes = len(usf_items) priordict = snafu.genGraphPrior([usf_graph], [usf_items], fitinfo=fitinfo) prior = (priordict, usf_items) if command['network_method'] == "RW": bestgraph = snafu.noHidden(Xs, numnodes) elif command['network_method'] == "Goni": bestgraph = snafu.goni(Xs, numnodes, td=toydata, valid=0, fitinfo=fitinfo) elif command['network_method'] == "Chan": bestgraph = snafu.chan(Xs, numnodes) elif command['network_method'] == "Kenett": bestgraph = snafu.kenett(Xs, numnodes) elif command['network_method'] == "FirstEdge": bestgraph = snafu.firstEdge(Xs, numnodes) elif command['network_method'] == "U-INVITE": bestgraph, ll = snafu.uinvite(Xs, toydata, numnodes, fitinfo=fitinfo, debug=False, prior=prior) nxg = nx.to_networkx_graph(bestgraph) node_degree = np.mean(dict(nxg.degree()).values()) nxg_json = jsonGraph(nxg, items) clustering_coefficient = nx.average_clustering(nxg) try: aspl = nx.average_shortest_path_length(nxg) except: aspl = "disjointed graph" return { "type": "network_properties", "node_degree": node_degree, "clustering_coefficient": clustering_coefficient, "aspl": aspl, "graph": nxg_json }
### What does this script do? ### ### 1) Import the USF network from disk ### 2) Generate toy data (censored random walks) from that network for many pseudo-participants ### 3) Estimate the best network from the data using one or several methods, showing how the method ### improves as more participants are added ### 4) Write data about those graphs to a CSV (cost, SDT measures) import snafu import networkx as nx import numpy as np subs=["S"+str(i) for i in range(101,151)] filepath = "fluency/spring2017.csv" category="animals" # read data from file, preserving hierarchical structure Xs, items, irtdata, numnodes, groupitems, groupnumnodes = snafu.readX(subs,filepath,category=category,removePerseverations=False,spellfile="spellfiles/zemla_spellfile.csv") # for each participant, calculate the average cluster size and average number of perservations across that participant's lists cluster_sizes = [] perseverations = [] for i in range(len(Xs)): cluster_sizes.append(snafu.avgClusterSize(Xs[i])) perseverations.append(snafu.avgNumPerseverations(Xs[i]))
'zibb_p': 0.5, 'prior_a': 2, 'prior_b': 1, 'goni_size': 2, 'goni_threshold': 2, 'followtype': "avg", 'prune_limit': 100, 'triangle_limit': 100, 'other_limit': 100 }) subs=["S"+str(i) for i in range(101,151)] filepath = "fluency/spring2017.csv" category="tools" # read in data from file, flattening all participants together filedata = snafu.readX(subs,filepath,category=category,removePerseverations=True,spellfile="spellfiles/tools_zemla_spellfile.csv") filedata.nonhierarchical() Xs_flat = filedata.Xs groupnumnodes = filedata.numnodes groupitems = filedata.groupitems filedata.hierarchical() Xs_hier = filedata.Xs items = filedata.items numnodes = filedata.numnodes graphs=[] for method in methods: # Estimate the best network using a Naive Random Walk