def individual_reciprocity_analysis(labelled_data, pid_dict, location_to_store): reciprocity_info = {} ff = filterfields() ff.setdata(labelled_data) polarity_data = {} for pid in pid_dict: print 'Working with PID: ', pid, '(', pid_dict[pid], ')' messages_by_participant = ff.filterbyequality(pr.m_source, pid) messages_to_participant = ff.filterbyequality(pr.m_target, pid) polarity_data[pid] = __get_polarity_composition(messages_by_participant+messages_to_participant, pid) reciprocity_info[pid] = {} n = len(messages_by_participant) idx = 0 for message in messages_by_participant: print 'idx=' + str(idx) + '/' + str(n) idx += 1 closest_message = find_closest_message(message, messages_to_participant, ff) target_type = 'P' if message[pr.m_target_type] == 'participant' else 'NP' target = message[pr.m_target] if target_type not in reciprocity_info[pid]: reciprocity_info[pid][target_type] = {} if target not in reciprocity_info[pid][target_type]: reciprocity_info[pid][target_type][target] = __basic_reciprocity_dict() sent_message_type = message[-1] reply_message_type = 'X' if closest_message is None else closest_message[-1] reciprocity_info[pid][target_type][target][sent_message_type][reply_message_type] += 1 print 'saving checkpoint...' hlp.dumpvariable([reciprocity_info, pid, pid_dict], 'checkpoint.chp', location_to_store) print 'saved!' return reciprocity_info, polarity_data
def find_reciprocity(labelled_data, location_to_store): ff = filterfields() ff.setdata(labelled_data) messages_sent_by_participants = ff.filterbyequality(pr.m_source_type, 'participant') reciprocity_dict = {'P': {'P': 0, 'U': 0, 'N': 0, 'X': 0}, 'N': {'P': 0, 'U': 0, 'N': 0, 'X': 0}, 'U': {'P': 0, 'U': 0, 'N': 0, 'X': 0}} n = len(messages_sent_by_participants) idx = 1 message_pairs = [] for message in messages_sent_by_participants: print 'at message ', idx, ' of ', n idx += 1 reply_message = find_closest_message(message, ff) sent_message_type = message[-1] if reply_message is None: reply_message_type = 'X' else: reply_message_type = reply_message[-1] reciprocity_dict[sent_message_type][reply_message_type] += 1 message_pairs.append((message, reply_message)) if 0 == idx%500: print 'saving...' hlp.dumpvariable([idx, reciprocity_dict, message_pairs, messages_sent_by_participants], 'checkpoint.chp', location_to_store) print 'done... out of the loop' to_use = {'P': '+', 'N': '-', 'U': 'u', 'X': 'null'} for sent_type in reciprocity_dict: recvd_types = reciprocity_dict[sent_type] for recvd_type in recvd_types: print 'N('+to_use[recvd_type]+'|'+to_use[sent_type]+')=', recvd_types[recvd_type] return reciprocity_dict, message_pairs
def getstats(filepath, participant_dict, p_type, message_type='sms'): ff = filterfields(filepath) ff.setdata(ff.filterbyequality(pr.m_type, message_type)) participant_stats = {} for participant_id in participant_dict: survey_no_list = participant_dict[participant_id] p_data = ff.filterbyequality(pr.m_source, participant_id) + \ ff.filterbyequality(pr.m_target, participant_id) if [] == p_data: print 'no data exists for pid: ' + participant_id continue pid_dict = hlp.getuniqueparticipants(p_data) for survey_no in survey_no_list: print 'Participant no.', participant_id, ' S.no.: ', survey_no idx = survey_no survey_no = survey_no_list[survey_no][0] end_date = ff.converttodate(survey_no[sr.s_time]) start_date = end_date - dt.timedelta(days=7) data_between_dates = ff.filterbetweendates(start_date, end_date, data_to_work=p_data) original_start_date = ff.converttodate(pr.start_datetime) data_start_to_date = ff.filterbetweendates(original_start_date, start_date, data_to_work=p_data) between_stats, before_stats = graphstats(data_start_to_date, data_between_dates, participant_id, p_type, original_start_date, start_date, ff, pid_dict) temp_dict = {'between': between_stats, 'before': before_stats, 'pid_dict': pid_dict} participant_stats[participant_id] = {idx: temp_dict} return participant_stats
def main(): parser = __define_process_parser() old_dataset_file, new_dataset_mapped, missing_data, \ survey_file, location_to_store = __define_process_parser(True, parser) old_dataset = hlp.readcsv(old_dataset_file, delimiter_sym=',', remove_first=True) new_dataset = hlp.readcsv(new_dataset_mapped, delimiter_sym=',', remove_first=True) old_data_missing = hlp.readcsv(missing_data, delimiter_sym=',', remove_first=True) old_missing = __dictify(0, old_data_missing) wi = weeklyinfo() week_info = wi.getweeklyfo(survey_file) week_list = week_info.keys() bullying_positives = __find_positive_survey(survey_file, week_info) if bullying_positives is None: print 'Exiting...' exit() ff = filterfields() old_data_weekly = hlp.divideintoweekly(old_dataset, week_info, ff, date_field=pr.m_time_sent) new_data_weekly = hlp.divideintoweekly(new_dataset, week_info, ff, date_field=nd.m_timecreated) bullying_res = [['pid_hash', 'survey_id', 'time_of_survey', 'n_old', 'n_new', 'raw', 'semi', 'ordered', 'other']] for datum in bullying_positives: bullying_week = datum[-1] prev_week = bullying_week - 1 if bullying_week > min(week_list) else min(week_list) next_week = bullying_week + 1 if bullying_week < max(week_list) else max(week_list) old_data_pos = old_data_weekly[prev_week] + old_data_weekly[bullying_week] + old_data_weekly[next_week] new_data_pos = new_data_weekly[prev_week] + new_data_weekly[bullying_week] + new_data_weekly[next_week] pid_hash = datum[s_i.s_participant] n_old, n_new, nfr_dict = compare_old_new(old_data_pos, new_data_pos, old_missing, pid_hash, ff) temp = [pid_hash, datum[s_i.s_id], datum[s_i.s_time], n_old, n_new, nfr_dict['raw'], nfr_dict['semi'], nfr_dict['ordered'], nfr_dict['other']] bullying_res.append(temp) hlp.writecsv(bullying_res, location_to_store+'bullying_res.csv', delimiter_sym=',')
def __find_positive_survey(survey_file, week_info): week_no = week_info.keys() week_no.sort() ff = filterfields() s_obj = surveys() survey_data = hlp.readcsv(survey_file, delimiter_sym=',') n_data = s_obj.interpretanswers(survey_data, True) bullying_positives = ff.filterbyequality(s_i.s_qno, '4', data=n_data[1:]) new_bullying_positives = [] for datum in bullying_positives: datetime_of_survey = ff.converttodate(datum[s_i.s_time]) found_match = False for week in week_no: (start_date, end_date) = week_info[week] if start_date <= datetime_of_survey <= end_date: datum.append(week) new_bullying_positives.append(datum) found_match = True break if not found_match: print 'Something funky happened...', datum return None return new_bullying_positives
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--messageFile', type=str, required=True) parser.add_argument('-mt', '--messageTypes', type=str, nargs='+') parser.add_argument('-o', '--outputFolder', type=str, required=True) parser.add_argument('-of', '--outputFile', type=str, required=True) parser.add_argument('-pd', '--participantDictionary', type=str) parser.add_argument('-i', '--ignoreParticipants', type=str) parser.add_argument('-mc', '--messageTypeConvert', type=str, nargs='*') args = parser.parse_args() message_file = args.messageFile message_types = args.messageTypes output_folder = args.outputFolder output_file = args.outputFile pid_dict = args.participantDictionary ignore_pids = args.ignoreParticipants message_type_conversions = args.messageTypeConvert ff = filterfields(message_file) ff.setdata(ff.getdata()[1:]) to_set_data = [] # extract the relevant data for message_type in message_types: to_set_data.extend(ff.filterbyequality(pr.m_type, message_type)) ff.setdata(to_set_data) if ignore_pids is not None: ignore_pids = hlp.recovervariable(ignore_pids) for pid in ignore_pids: ff.removebyequality(pr.m_source, pid) ff.removebyequality(pr.m_target, pid) # set the pid to normal id dictionary if pid_dict is None: pid_dict = hlp.getuniqueparticipants(ff.getdata(), mtype='all', separate_pid_npid=True) # replace the message type names with the ones provided if message_type_conversions is not None: for idx in range(0, len(message_type_conversions), 2): message_to_convert = message_type_conversions[idx] to_convert_to = message_type_conversions[idx+1] ff.replacebyequality(pr.m_type, message_to_convert, to_convert_to) message_types = ff.getuniqueelements(pr.m_type) coded_participant_list = pid_dict[pr.participant['all']].values() storage_dict = initiatestorage(coded_participant_list, message_types) storage_dict = getperparticipantinout(ff.getdata(), storage_dict, pid_dict) plotperparticipantbar(storage_dict, 'Participant ID', '# of Messages', message_types, 'Per Participant Messages', output_folder+output_file) hlp.dumpvariable(pid_dict, 'pid_dict.dict', output_folder) hlp.dumpvariable(ff.getdata(), 'messageData.list', output_folder)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-o', '-O', required=True, help='Old dataset csv') parser.add_argument('-n', '-N', required=True, help='New dataset csv') parser.add_argument('-s', '-S', required=True, help='Survey file') parser.add_argument('-p', '-P', required=True, help='folder to store figures in, should end with /') parser.add_argument('-m', '-M', required=True, help='Master hash mapping csv') parser.add_argument('-mt', '-MT', required=True, nargs='+', help='Types of messages to look for') parser.add_argument('-d', '-D', action='store_true', help='Flag to debug') args = parser.parse_args() old_dataset_file = args.o new_dataset_file = args.n survey_file = args.s location_to_store = args.p master_hash_csv = args.m message_types = args.mt do_debug = args.d print 'Reading data...' master_csv = hlp.readcsv(master_hash_csv, delimiter_sym=',', remove_first=True) master_dict = {datum[1]: datum[0] for datum in master_csv} ff = filterfields() filtered_old = [] filtered_new = [] old_dataset = hlp.readcsv(old_dataset_file, delimiter_sym=',', remove_first=True) new_dataset = hlp.readcsv(new_dataset_file, delimiter_sym=',', remove_first=True) print 'Filtering message types' for message_type in message_types: filtered_old.extend(ff.filterbyequality(pr.m_type, message_type, data=old_dataset)) filtered_new.extend(ff.filterbyequality(pr.m_type, message_type, data=new_dataset)) wi = weeklyinfo() weekly_info = wi.getweeklyfo(survey_file) week_list = weekly_info.keys() week_list.sort() print 'Creating in out dictionary' in_out_message_dict = get_message_counts(filtered_old, filtered_new, week_list, weekly_info, master_dict, ff, location_to_store, do_debug) print 'Plotting...' for pid in in_out_message_dict: print pid plot_distribution(in_out_message_dict[pid][0][0], in_out_message_dict[pid][0][1], in_out_message_dict[pid][1][0], in_out_message_dict[pid][1][1], week_list, pid, location_to_store) print 'TADAA!!'
def filterweeklydata(self, pid_dict, message_list, week_info, message_type='sms'): participant_dict = pid_dict[pr.participant[message_type]] ff_obj = filterfields('') ff_obj.setdata(message_list) min_week = min(week_info.keys()) max_week = max(week_info.keys()) min_date = week_info[min_week][0] max_date = week_info[max_week][1] weekly_dist = {} for pid in participant_dict.keys(): weekly_dist[pid] = self.__perparticipantprocessing(pid, ff_obj, curr_min=min_date, curr_max=max_date, send_week_info=False, week_info=week_info) return weekly_dist
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', '-F', type=str, required=True, help='filepath for message file') parser.add_argument('-o', '-O', type=str, required=True, help='path to store figure in') parser.add_argument('-mt', '-MT', type=str, nargs='*') parser.add_argument('-c', '-C', type=str, nargs='*') args = parser.parse_args() message_filename = args.f output_location = args.o message_types = args.mt combine_change = args.c to_combine = None if combine_change is None else [] to_change_to = None if combine_change is None else [] if combine_change is not None: for idx in range(0, len(combine_change), 2): to_combine.append(combine_change[idx].lower()) to_change_to.append(combine_change[idx+1]) ff = filterfields(message_filename) ff.setdata(ff.getdata()[1:]) message_types = ff.getuniqueelements(pr.m_type) if message_types is None else message_types numbers = [] for message_type in message_types: numbers.append(len(ff.filterbyequality(pr.m_type, message_type))) if to_combine is not None: numbers, message_types = combineandconvert(numbers, message_types, to_combine, to_change_to) for idx in range(len(numbers)): print message_types[idx], numbers[idx] fig = plt.figure(figsize=[12, 12]) ax = fig.add_subplot(111) tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)] for i in range(len(tableau20)): r, g, b = tableau20[i] tableau20[i] = (r / 255., g / 255., b / 255.) patches, texts, autotexts = ax.pie(numbers, labeldistance=1.05, colors=tableau20, autopct='%1.1f%%', startangle=90) for idx in range(len(texts)): texts[idx].set_fontsize(20) for idx in range(len(autotexts)): autotexts[idx].set_fontsize(20) plt.legend(labels=message_types, loc='upper right', fontsize=20, bbox_to_anchor=(1.05, 1)) plt.savefig(output_location, bbox_inches='tight')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', '-D', required=True, help='labelled data from validate_balance_theory.py') parser.add_argument('-f', '-F', required=True, help='folder to save the data in') parser.add_argument('-w', '-W', required=False, help='survey file for weekly data processing') args = parser.parse_args() data_file = args.d location_to_store = args.f weekly_surveys = args.w all_data = hlp.recovervariable(data_file) labelled_data = all_data[2] pid_dict = all_data[3] if weekly_surveys is None: reciprocity_info, polarity_info = individual_reciprocity_analysis(labelled_data, pid_dict['participants'], location_to_store) analyze_info(reciprocity_info, pid_dict, location_to_store, 'pr_overall.csv') analyze_polarity(polarity_info, pid_dict, location_to_store, 'polarity_overall.csv') hlp.dumpvariable([reciprocity_info, labelled_data, pid_dict, polarity_info], 'reciprocity_info_overall.dict', location_to_store) else: # working with bimonthly data months2 = [[1, 2, 3, 4, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15, 16], [17, 18, 19, 20, 21, 22, 23, 24, 25]] wi = weeklyinfo() weekly_info = wi.getweeklyfo(weekly_surveys) ff = filterfields() weekly_data = hlp.divideintoweekly(labelled_data, weekly_info, ff) idx = 1 for bi_month in months2: print 'For weeks: ', bi_month bi_month_data = [] for weekno in bi_month: bi_month_data.extend(weekly_data[weekno]) reciprocity_info, polarity_info = individual_reciprocity_analysis(bi_month_data, pid_dict['participants'], location_to_store) analyze_info(reciprocity_info, pid_dict, location_to_store, 'pr_bimonthly_'+str(idx)+'.csv') analyze_polarity(polarity_info, pid_dict, location_to_store, 'polarity_bimonthly_'+str(idx)+'.csv') hlp.dumpvariable([reciprocity_info, labelled_data, pid_dict, polarity_info], 'reciprocity_info_bimonthly_'+str(idx)+'.data', location_to_store) idx += 1 print 'tadaa!'
def main(): parser = argparse.ArgumentParser('Script to generate distribution ' 'of edge weights/degrees for all ' 'participants') parser.add_argument('-m', '-M', type=str, required=True, help='location of the message file') parser.add_argument('-mt', '-MT', type=str, default='all', help='types of messages to plot, currently supports ' 'one of the following: sms, fb, twitter, or all') parser.add_argument('-r', '-R', type=str, required=True, help='survey file') parser.add_argument('-s', '-S', type=str, required=True, help='folder to store data in, leading / required') parser.add_argument('-p', '-P', action='store_true', help='flag to generate plots') args = parser.parse_args() survey_file = args.r message_file = args.m m_type = args.mt folder_to_store = args.s generate_plots = args.p wi = weeklyinfo() week_info = wi.getweeklyfo(survey_file) ff = filterfields(message_file) filtered_data = [] if m_type == 'all': for message_type in ['sms', 'fb_message']: filtered_data.extend(ff.filterbyequality(pr.m_type, message_type)) else: filtered_data = ff.filterbyequality(pr.m_type, m_type) _, links_tuple, _, pid_dict = hlp.creategraph(filtered_data, filterType=args.mt) gh = ghelper() plt = plots() weekly_deg_dist, _ = gh.getweeklydistributions(pid_dict, filtered_data, message_type=args.mt, is_degree=True, week_info=week_info) hlp.dumpvariable(weekly_deg_dist, 'weekly_deg_dist.dict', folder_to_store) weekly_ew_dist, _ = gh.getweeklydistributions(pid_dict, filtered_data, message_type=args.mt, is_degree=False, week_info=week_info) hlp.dumpvariable(weekly_ew_dist, 'weekly_ew_dist.dict', folder_to_store) if generate_plots: plt.plotweeklyprogression(weekly_deg_dist, folder_to_store + 'deg_', 'No. of friends', 'Week No.', 'Friends') plt.plotweeklyprogression(weekly_ew_dist, folder_to_store + 'ew_', 'No. of messages exhanged', 'Week No.', 'Messages') print 'done...'
def get_degree_message_count(dataset, pid_dict): ff = filterfields() ff.setdata(dataset) in_d = {} out_d = {} in_m = {} out_m = {} for pid in pid_dict: incoming_messages = ff.filterbyequality(nd.m_target, pid) outgoing_messages = ff.filterbyequality(nd.m_source, pid) people_sending_me_messages = ff.getuniqueelements(nd.m_source, data=incoming_messages) people_i_am_sending_messages = ff.getuniqueelements(nd.m_target, data=outgoing_messages) in_m[pid] = len(incoming_messages) out_m[pid] = len(outgoing_messages) in_d[pid] = len(people_sending_me_messages) out_d[pid] = len(people_i_am_sending_messages) return in_m, out_m, in_d, out_d
def generate_new_dataset_dictionary(new_dataset, use_m_id=False): ff = filterfields() new_dataset_dictionary = {} for datum in new_dataset: if not use_m_id: src = datum[nd.m_source] trg = datum[nd.m_target] if (src, trg) not in new_dataset_dictionary: new_dataset_dictionary[(src, trg)] = {} message_type = datum[nd.m_type] if message_type not in new_dataset_dictionary[(src, trg)]: new_dataset_dictionary[(src, trg)][message_type] = {} create_time = datum[nd.m_timecreated] create_time_dt = ff.converttodate(create_time) if create_time_dt not in new_dataset_dictionary[(src, trg)][message_type]: new_dataset_dictionary[(src, trg)][message_type][create_time_dt] = [] new_dataset_dictionary[(src, trg)][message_type][create_time_dt].append(datum) else: new_dataset_dictionary[datum[nd.msg_id]] = datum return new_dataset_dictionary
def getweeklydistributions(self, pid_dict, message_list, message_type='sms', is_degree=True, week_info=None): # is_degree = F --> edge weight distribution week_info = {} if week_info is None else week_info participant_dict = pid_dict[pr.participant[message_type]] ff_obj = filterfields('') ff_obj.setdata(message_list) if week_info is not None: min_week = min(week_info.keys()) max_week = max(week_info.keys()) min_date = week_info[min_week][0] max_date = week_info[max_week][1] else: min_date, max_date = self.getminmaxdates(message_list, ff_obj) weekly_dist = {} for pid in participant_dict.keys(): weekly_dist[pid] = {} if week_info is None: weekly_dict, temp_week_info = self.__perparticipantprocessing(pid, ff_obj, curr_min=min_date, curr_max=max_date, send_week_info=True) week_info[pid] = temp_week_info else: weekly_dict = self.__perparticipantprocessing(pid, ff_obj, curr_min=min_date, curr_max=max_date, send_week_info=False, week_info=week_info) weekly_graphs = self.__weeklygraphs(weekly_dict, pid_dict, message_type=message_type) for weekno in weekly_graphs.keys(): go = weekly_graphs[weekno] if is_degree: g_info = go.getdegrees(participant_dict[pid]) else: g_info_ew = go.getedgeweights(participant_dict[pid]) in_w = 0 out_w = 0 for e_tuple in g_info_ew[0]: in_w += e_tuple[2]['weight'] for e_tuple in g_info_ew[1]: out_w += e_tuple[2]['weight'] g_info = [in_w, out_w] weekly_dist[pid][weekno] = g_info return weekly_dist, week_info
def get_count_degrees_messages_directed(labelled_data, pid_dict): fobj = filterfields() fobj.setdata(labelled_data) in_m = [] out_m = [] in_d = [] out_d = [] pid_order = [] for pid in pid_dict: in_data = fobj.filterbyequality(pr.m_target, pid) out_data = fobj.filterbyequality(pr.m_source, pid) in_m.append(len(in_data)) out_m.append(len(out_data)) people_sending_me_messages = fobj.getuniqueelements(pr.m_source, data=in_data) people_i_am_sending_messages_to = fobj.getuniqueelements(pr.m_target, data=out_data) in_d.append(len(people_sending_me_messages)) out_d.append(len(people_i_am_sending_messages_to)) pid_order.append(pid) return in_m, out_m, in_d, out_d
def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', '-D', required=True, help='labelled data from validate_balance_theory.py') parser.add_argument('-f', '-F', required=True, help='folder to save the data in') parser.add_argument('-w', '-W', required=False, help='survey file for weekly data processing') args = parser.parse_args() data_file = args.d location_to_store = args.f weekly_surveys = args.w all_data = hlp.recovervariable(data_file) labelled_data = all_data[2] pid_dict = all_data[3] if weekly_surveys is None: reciprocity_dict, message_pairs = find_reciprocity(labelled_data, location_to_store) hlp.dumpvariable([reciprocity_dict, message_pairs], 'reciprocity_counts_msgPairs_overall', location_to_store) else: months2 = [[1, 2, 3, 4, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15, 16], [17, 18, 19, 20, 21, 22, 23, 24, 25]] wi = weeklyinfo() weekly_info = wi.getweeklyfo(weekly_surveys) ff = filterfields() weekly_data = hlp.divideintoweekly(labelled_data, weekly_info, ff) idx = 1 for bi_month in months2: print 'For weeks: ', bi_month bi_month_data = [] for weekno in bi_month: bi_month_data.extend(weekly_data[weekno]) reciprocity_dict, message_pairs = find_reciprocity(bi_month_data, location_to_store) hlp.dumpvariable([reciprocity_dict, message_pairs], 'reciprocity_counts_msgPairs_bimonthly_'+str(idx)+'.data', location_to_store)
def main(): ff = filterfields(sys.argv[1]) print 'filtering...' filtered_data = ff.filterbyequality(pr.m_type, sys.argv[6]) hlp.dumpvariable(filtered_data, 'filtered_'+sys.argv[6], sys.argv[5]) print 'done' if '-' is not sys.argv[2]: writecsv(sys.argv[2], filtered_data) if '-' is not sys.argv[3]: links, link_tuple, graph_obj, pid_dict = hlp.creategraph(filtered_data) hlp.dumpvariable(links, 'static_links', sys.argv[5]) hlp.dumpvariable(link_tuple, 'static_links_tuple', sys.argv[5]) hlp.dumpvariable(graph_obj, 'static_graph_obj', sys.argv[5]) hlp.dumpvariable(pid_dict, 'pid_dict', sys.argv[5]) graph_obj.writegraph(sys.argv[3]) if '-' is not sys.argv[4]: to_write_edge, to_write_nodes, week_dict, pid_dict, week_content = hlp.creategraph(filtered_data, False) writetofile(sys.argv[4]+'_el.csv', to_write_edge) writetofile(sys.argv[4]+'_nl.csv', to_write_nodes) hlp.dumpvariable(week_dict, 'dynamic_week_dict', sys.argv[5]) hlp.dumpvariable(pid_dict, 'pid_dict', sys.argv[5]) hlp.dumpvariable(week_content, 'week_content', sys.argv[5])
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '-M', required=True, help='Sentiment Message file') parser.add_argument('-t', '-T', action='store_true', help='Sentiment type flag, if used then vader, else afinn') parser.add_argument('-f', '-F', required=True, help='Folder to store checkpoints, and final result') parser.add_argument('-w', '-W', required=False, help='Per week/month analysis') args = parser.parse_args() message_file = args.m sentiment_type = args.t location_to_store = args.f survey_file = args.w # get message data, only sms and fb_message ff = filterfields(message_file) ff.setdata(ff.getdata()[1:]) sms_data = ff.filterbyequality(pr.m_type, 'sms') pid_dict_sms = hlp.getuniqueparticipants2(sms_data) fb_message_data = ff.filterbyequality(pr.m_type, 'fb_message') pid_dict_fb = hlp.getuniqueparticipants2(fb_message_data) message_data = sms_data + fb_message_data # put the labels on labelled_data = hlp.processvadersentiment(message_data, label_only=False) if sentiment_type else \ hlp.processafinnsentiment(message_data, label_only=False) if survey_file is not None: wi = weeklyinfo() weekly_info = wi.getweeklyfo(survey_file) weekly_data = hlp.divideintoweekly(labelled_data, weekly_info, ff) #__temp_testing_for_discrepancy(labelled_data, weekly_data) # get the pid_dict for easier handling pid_dict = hlp.getuniqueparticipants2(labelled_data) if survey_file is not None: over_sent, in_sent, out_sent, xtick, ytick = per_participant_sentiment(weekly_data, pid_dict['participants']) __plot_imshow(over_sent, 'Participant', 'Week #', xtick, ytick, location_to_store+'sent_imshow_over.pdf') __plot_imshow(in_sent, 'Participant', 'Week #', xtick, ytick, location_to_store+'sent_imshow_in.pdf') __plot_imshow(out_sent, 'Participant', 'Week #', xtick, ytick, location_to_store+'sent_imshow_out.pdf') print '***SMS***' print 'P: ', len(pid_dict_sms['participants'].values()), ' NP: ', len(pid_dict_sms['nonparticipants'].values()) print '***FB***' print 'P: ', len(pid_dict_fb['participants'].values()), 'NP: ', len(pid_dict_fb['nonparticipants'].values()) print '***OVERALL***' print 'P: ', len(pid_dict['participants'].values()), 'NP: ', len(pid_dict['nonparticipants'].values()) summary_src_trg = summarize_message_by_src_trg(labelled_data) print '***Message Distribution***' for m_type_1 in summary_src_trg: print m_type_1, summary_src_trg[m_type_1] if survey_file is not None: week_list = weekly_data.keys() week_list.sort() # this is not good, as there aren't enough triads months = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24, 25]] # this has at least 8 triads, always, use this months2 = [[1, 2, 3, 4, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15, 16], [17, 18, 19, 20, 21, 22, 23, 24, 25]] month_idx = 1 for month in months2: labelled_data = [] for week in month: labelled_data.extend(weekly_data[week]) general_graph, random_graph = conduct_triad_analysis(labelled_data, pid_dict) frac_triad = general_graph[3] summary_triad = general_graph[2] frac_triad_rand = random_graph[3] summary_triad_rand = random_graph[2] print '** Months ', 2*month_idx-1, 2*month_idx, ': ', month,' ***' print 'len(LD): ', len(labelled_data) for summary in frac_triad: print summary, 'Study: ', frac_triad[summary], '(', len(summary_triad[summary]), ')', ' Random: ', \ frac_triad_rand[summary], '(', len(summary_triad_rand[summary]), ')' words_list, short_list = word_count(labelled_data) toWrite_wl_csv = create_word_count_csv(words_list) hlp.writecsv(toWrite_wl_csv, location_to_store+'word_list_'+str(2*month_idx-1)+'-'+str(2*month_idx)+'.csv', delimiter_sym=',') for mtype in words_list: counted_words = Counter(words_list[mtype]) counted_short = Counter(short_list[mtype]) print '***For '+mtype+' ***' print 'Top 20 words: ', __get_top_word_sentiment(counted_words.most_common(20)) print 'Top 20 short: ', counted_short.most_common(20) print '\n\n' hlp.dumpvariable([general_graph, random_graph, labelled_data, pid_dict], 'month_'+str(month_idx)+'.list', location_to_store) month_idx += 1 else: print 'len(LD): ', len(labelled_data) words_list, short_list = word_count(labelled_data) toWrite_wl_csv = create_word_count_csv(words_list) hlp.writecsv(toWrite_wl_csv, location_to_store+'word_list.csv', delimiter_sym=',') for mtype in words_list: counted_words = Counter(words_list[mtype]) counted_short = Counter(short_list[mtype]) print '***For '+mtype+' ***' print 'Top 20 words: ', __get_top_word_sentiment(counted_words.most_common(20)) print 'Top 20 short: ', counted_short.most_common(20) print '\n\n' general_graph, random_graph = conduct_triad_analysis(labelled_data, pid_dict) frac_triad = general_graph[3] summary_triad = general_graph[2] frac_triad_rand = random_graph[3] summary_triad_rand = random_graph[2] for summary in frac_triad: print summary, 'Study: ', frac_triad[summary], '(', len(summary_triad[summary]), ')', ' Random: ', \ frac_triad_rand[summary], '(', len(summary_triad_rand[summary]), ')' hlp.dumpvariable([general_graph, random_graph, labelled_data, pid_dict], 'Overall.list', location_to_store) # plot_degree_dist(general_graph[4], 'Degree(d)', '# of Participants with Degree d') pos, neg, neu = get_polarity_directionality(labelled_data) print '***Polarity Distribution***' print 'Positive: \n', pos print 'Negative: \n', neg print 'Neutral: \n', neu in_m, out_m, in_d, out_d = get_count_degrees_messages_directed(labelled_data, pid_dict['participants']) print '***Incoming Messages***' print 'Total: ', sum(in_m), 'Mean: ', np.mean(in_m), 'Std. dev.: ', np.std(in_m) print '***Outgoing Messages***' print 'Total: ', sum(out_m), 'Mean: ', np.mean(out_m), 'Std. dev.: ', np.std(out_m) print '***In Degree***' print 'Total: ', sum(in_d), 'Mean: ', np.mean(in_d), 'Std. dev.: ', np.std(in_d) print '***Out Degree***' print 'Total: ', sum(out_d), 'Mean: ', np.mean(out_d), 'Std. dev.: ', np.std(out_d) print '***COUNTS***' plot_messages_degree([in_m, out_m], '# of Messages', 'Cumulative Participant Prob.', location_to_store+'in_out_messages.pdf') # plot_messages_degree(out_m, '# of Outgoing Messages', 'Cumulative Participant Prob.', # location_to_store+'out_messages.pdf') plot_messages_degree([in_d, out_d], 'Degree', 'Cumulative Participant Prob.', location_to_store+'in_out_degree.pdf', True) # plot_messages_degree(out_d, 'Out Degree', 'Cumulative Participant Prob.', # location_to_store+'out_degree.pdf', True) print 'TADAA!!'
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '-M', type=str, required=True, help='Message list file') parser.add_argument('-r', '-R', type=str, required=True, help='survey file') parser.add_argument('-p', '-P', type=str, required=True, help='PID dict inverted') parser.add_argument('-b', '-B', type=str, required=True, help='bullying dictionary') parser.add_argument('-o', '-O', type=str, required=True, help='Output folder') parser.add_argument('-l', '-L', type=str, nargs='+', help='Filters chosen') parser.add_argument('-f', '-f', type=str, nargs='+', help='Filter files') args = parser.parse_args() output_folder = args.o message_data = hlp.recovervariable(args.m) pid_dict = hlp.recovervariable(args.p) filters_chosen = args.l filter_files = args.f catch_all_data = hlp.getfilterdata(filters_chosen, filter_files, catch_all=True) wi = weeklyinfo() weekly_info = wi.getweeklyfo(args.r) ff = filterfields() gh = ghelper() bullying_overlay = gh.createbullyingoverlay(catch_all_data, weekly_info, ff) bullying_overlay = flip_bullying_overlay(bullying_overlay, weekly_info.keys()) pid_list = pid_dict.keys() pid_list.sort() for pid in pid_list: training_set_final = [] testing_set_final = [] pid_list_training = deepcopy(pid_list) pid_list_training.remove(pid) ff.setdata(message_data) testing_raw_data = ff.filterbyequality(pr.m_source, pid_dict[pid]) + \ ff.filterbyequality(pr.m_target, pid_dict[pid]) ff.removebyequality(pr.m_source, pid_dict[pid]) ff.removebyequality(pr.m_target, pid_dict[pid]) training_raw_data = ff.getdata() fe = raw_features(data=None) _, _ = fe.get_scoring_factors(training_raw_data) training_weekly_data = {} for training_pid in pid_list_training: training_weekly_data[training_pid] = {} data_to_use = ff.filterbyequality(pr.m_source, pid_dict[training_pid]) + \ ff.filterbyequality(pr.m_target, pid_dict[training_pid]) if 0 == len(data_to_use): print 'no data found, probably filtered into the testing set, Training PID: '+\ training_pid+', Testing PID: '+pid continue pid_weekly_w_bullying, global_in_degree, global_out_degree, global_in_ew, global_out_ew, incoming_ss, \ outgoing_ss = get_pid_level_features(data_to_use, weekly_info, ff, bullying_overlay, pid_dict, training_pid, fe) for week_no in pid_weekly_w_bullying: fr_in_degree, fr_out_degree, fr_in_ew, \ fr_out_ew, fr_in_senti, fr_out_senti, \ current_in_ss, current_out_ss = get_week_features(pid_weekly_w_bullying, week_no, fe, global_in_degree, global_out_degree, global_in_ew, global_out_ew, incoming_ss, outgoing_ss, pid_dict[training_pid]) training_set_final.append( [training_pid, week_no, fr_in_senti[0], fr_in_senti[1], fr_in_senti[2], fr_out_senti[0], fr_out_senti[1], fr_out_senti[2], fr_in_degree, fr_out_degree, fr_in_ew, fr_out_ew, current_in_ss, current_out_ss, pid_weekly_w_bullying[week_no]['label']]) # testing pid pid_weekly_w_bullying, global_in_degree, global_out_degree, \ global_in_ew, global_out_ew, incoming_ss, outgoing_ss = get_pid_level_features(testing_raw_data, weekly_info, ff, bullying_overlay, pid_dict, pid, fe) for week_no in pid_weekly_w_bullying: fr_in_degree, fr_out_degree, fr_in_ew, \ fr_out_ew, fr_in_senti, fr_out_senti, \ current_in_ss, current_out_ss = get_week_features(pid_weekly_w_bullying, week_no, fe, global_in_degree, global_out_degree, global_in_ew, global_out_ew, incoming_ss, outgoing_ss, pid_dict[pid]) testing_set_final.append( [pid, week_no, fr_in_senti[0], fr_in_senti[1], fr_in_senti[2], fr_out_senti[0], fr_out_senti[1], fr_out_senti[2], fr_in_degree, fr_out_degree, fr_in_ew, fr_out_ew, current_in_ss, current_out_ss, pid_weekly_w_bullying[week_no]['label']]) header = ['pid', 'wkno', 'frWInSenPos', 'frWInSenNeu', 'frWInSenNeg', 'frWOutSenPos', 'frWOutSenNeu', 'frWOutSenNeg', 'frInDegO', 'frOutDegO', 'frInEdgeO', 'frOutEdgeO', 'inSenSc', 'outSenSc', 'label'] training_set_final = [header] + training_set_final testing_set_final = [header] + testing_set_final hlp.writecsv(training_set_final, output_folder+pid+'_tr.csv') hlp.writecsv(testing_set_final, output_folder+pid+'_ts.csv')
from sentimentanalysis import sentiment from filterByField import filterfields from basicInfo import twitterdataset as td from basicInfo import privateInfo as pr import helper as hlp import random data = hlp.readcsv('../ignore_data/Sentiment_Twitter.csv') data = data[1:] ff = filterfields('../ignore_data/messages.csv') smsdata = ff.filterbyequality(pr.m_type, 'sms') k = len(data) l = len(smsdata) seed = 254 random.seed(seed) tr_n = 1000000 ts_n = 30 idx = 0 tr_before = [] ts_before = [] while idx < tr_n: i = random.randint(0, k) datum = data[i] tweet_type = td.sentiment_dict[datum[td.sentiment]] tweet_content = datum[td.sentiment_text] tr_before.append((tweet_content, tweet_type)) idx += 1 random.seed(seed)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-o', '-O', help='Old Dataset', required=True) parser.add_argument('-n', '-N', help='New Dataset', required=True) parser.add_argument('-f', '-F', help='Folder to store results in, ending with /', required=True) parser.add_argument('-p', '-P', help='text file with list of people who were ordered to be removed', required=True) parser.add_argument('-s', '-S', help='text file with list of people who were semi-consented', required=True) args = parser.parse_args() old_dataset_file = args.o new_dataset_file = args.n location_to_store = args.f ordered_removed_file = args.p semi_consented_file = args.s print '***Reading data from arguments...' old_dataset = hlp.readcsv(old_dataset_file, delimiter_sym=',', remove_first=True) new_dataset = hlp.readcsv(new_dataset_file, delimiter_sym=',') new_dataset_dictionary = generate_new_dataset_dictionary(new_dataset[1:]) new_dataset_msg_id_dictionary = generate_new_dataset_dictionary(new_dataset[1:], use_m_id=True) with open(ordered_removed_file, 'r') as f: ordered_removed = eval(f.read()) with open(semi_consented_file, 'r') as f: semi_consented = eval(f.read()) print '***Filtering old data within dates of study...' ff = filterfields() old_dataset_within_dates = ff.filterbetweendates(ff.converttodate(pr.start_datetime), ff.converttodate(pr.end_datetime), data_to_work=old_dataset, right_equality=True, date_field=pr.m_time_sent) old_dataset = old_dataset_within_dates old_dataset_counts = {} for datum in old_dataset: m_type = datum[pr.m_type] if m_type not in old_dataset_counts: old_dataset_counts[m_type] = 0 old_dataset_counts[m_type] += 1 print '*** OLD DATASET COUNTS***', old_dataset_counts print '***Finding mapping...' mapping_dict = {} inverted_mapping_dict = {} missed_dict = {} no_reason = [] counts_no_match = {'ord': {'sms': 0, 'fb_message': 0, 'twitter_status': 0, 'twitter_message': 0, 'fb_activity': 0, 'fb_like': 0, 'fb_comment': 0}, 'semi': {'sms': 0, 'fb_message': 0, 'twitter_status': 0, 'twitter_message': 0, 'fb_activity': 0, 'fb_like': 0, 'fb_comment': 0}, 'no': {'sms': 0, 'fb_message': 0, 'twitter_status': 0, 'twitter_message': 0, 'fb_activity': 0, 'fb_like': 0, 'fb_comment': 0}} counts_match = {'sms': 0, 'fb_message': 0, 'twitter_status': 0, 'twitter_message': 0, 'fb_activity': 0, 'fb_like': 0, 'fb_comment': 0} no_reason_counts = {} for datum in old_dataset: m_result, msg_val = message_exists(datum, new_dataset_dictionary, ff) if m_result: mapping_dict[datum[pr.msg_id]] = msg_val if msg_val[1] not in inverted_mapping_dict: inverted_mapping_dict[msg_val[1]] = [] inverted_mapping_dict[msg_val[1]].append(datum[pr.msg_id]) m_type = datum[pr.m_type] if m_type in counts_match: counts_match[m_type] += 1 else: src = datum[pr.m_source] trg = datum[pr.m_target] m_type = datum[pr.m_type] if src in ordered_removed or trg in ordered_removed: reason = 'ordered removed' if m_type in counts_no_match['ord']: counts_no_match['ord'][m_type] += 1 elif src in semi_consented or trg in semi_consented: reason = 'semi consented' if m_type in counts_no_match['semi']: counts_no_match['semi'][m_type] += 1 else: reason = '' temp = datum temp.append(msg_val) no_reason.append(temp) if m_type in counts_no_match['no']: counts_no_match['no'][m_type] += 1 if m_type not in no_reason_counts.keys(): no_reason_counts[m_type] = {} if msg_val not in no_reason_counts[m_type].keys(): no_reason_counts[m_type][msg_val] = 0 no_reason_counts[m_type][msg_val] += 1 missed_dict[datum[pr.msg_id]] = [msg_val, datum[pr.m_type], reason] print '\n\n**NOT FOUND**' for key_v in counts_no_match.keys(): print key_v print counts_no_match[key_v] print '\n\n**NO REASON**' for key_v in no_reason_counts.keys(): print key_v print no_reason_counts[key_v] print '\n\n**FOUND**', counts_match print '***Creating new dataset with mappings...' new_dataset_header = new_dataset[0] new_dataset_header.extend(['Old Message IDs']) final_dataset = [new_dataset_header] for new_msg_id in new_dataset_msg_id_dictionary.keys(): datum = new_dataset_msg_id_dictionary[new_msg_id] old_msg_id = [''] if new_msg_id not in inverted_mapping_dict else inverted_mapping_dict[new_msg_id] datum.extend(old_msg_id) final_dataset.append(datum) print '***Writing data...' hlp.writecsv(final_dataset, location_to_store + 'new_old_mapped_hashed_dataset.csv', delimiter_sym=',') mapping_dict_list = [[x, mapping_dict[x][0], mapping_dict[x][1]] for x in mapping_dict] mapping_header = [['old_id', 'cosine_val', 'new_id']] mapping_header.extend(mapping_dict_list) hlp.writecsv(mapping_header, location_to_store + 'old_to_new_mapping.csv', delimiter_sym=',') missed_dict_list = [[x, missed_dict[x][0], missed_dict[x][1], missed_dict[x][2]] for x in missed_dict] missed_header = [['old_id', 'Reason', 'm_type', 'Explanation']] missed_header.extend(missed_dict_list) hlp.writecsv(missed_header, location_to_store + 'old_not_found.csv', delimiter_sym=',') hlp.writecsv(no_reason, location_to_store + 'old_not_found_no_reason.csv', delimiter_sym=',') print 'TADAA!!!'
def main(): parser = argparse.ArgumentParser('Script to perform sentiment analysis using VADER') parser.add_argument('-m', '-M', type=str, required=True, help='Location of the message file') parser.add_argument('-mt', '-MT', type=str, required=True, nargs='+', help='types of messages to filter') parser.add_argument('-f', '-F', type=str, required=True, help='filename where data is stored, no extension needed') parser.add_argument('-s', '-S', type=str, required=True, help='location of folder to store the file, ends with a /') parser.add_argument('-p', '-P', action='store_true', help='flag to store polarities separately') parser.add_argument('-w', '-W', type=str, required=False, help='conduct weekly analysis, path to the survey data for ' 'creating week information') parser.add_argument('-l', '-L', type=str, nargs='+', required=True, help='the filters to use, make one or more choices: seenB, wasB, didB') parser.add_argument('-lf', '-LF', type=str, nargs='+', required=True, help='location of filtered data, from runSurveyStats.py, in same order as -l/L flag') args = parser.parse_args() message_file = args.m message_types = args.mt filename_to_store = args.f location_to_store = args.s separate_polarity_score = args.p survey_file = args.w filters_chosen = args.l filter_files = args.lf catch_all_data = hlp.getfilterdata(filters_chosen, filter_files, catch_all=True) if separate_polarity_score and survey_file is not None: print 'Cannot have separate polarity scores and weekly analysis together, ' \ 'please remove the -p/-P flag' return if survey_file is not None: wi = weeklyinfo() week_dates = wi.getweeklyfo(survey_file) gh = ghelper() ff = filterfields(message_file) data = [] for message_type in message_types: data.extend(ff.filterbyequality(pr.m_type, message_type)) pid_dict = hlp.getuniqueparticipants(data, 'all' if len(message_types) > 1 else message_types[0]) sentiment_analyzer = vadersenti(data[1:]) returned_data = sentiment_analyzer.compilesentiment(pr.m_content, separate_sentiment_list=separate_polarity_score) if separate_polarity_score: hlp.dumpvariable(returned_data, filename_to_store + '.data', location_to_store) else: header = pr.message_header + ['pos', 'neg', 'neu', 'compound'] final_data = [header] + returned_data hlp.writecsv(final_data, location_to_store + filename_to_store + '.csv') weekly_data = gh.filterweeklydata(pid_dict, returned_data, week_dates, 'all' if len(message_types) > 1 else message_types[0]) hlp.dumpvariable(weekly_data, 'weekly_data.dict', location_to_store) summarized_sentiment = {} for pid in weekly_data: summarized_sentiment[pid] = {} participant_data = weekly_data[pid] for week_no in participant_data: summarized_sentiment[pid][week_no] = sentiment_analyzer.summarizesentiment(participant_data[week_no], separate_in_out=True, message_type=message_type) hlp.dumpvariable(summarized_sentiment, 'weekly_summarized_sentiment.dict', location_to_store) plt = plots() overlay_data = gh.createbullyingoverlay(catch_all_data, week_dates, ff) plt.plotweeklyprogression(summarized_sentiment, location_to_store, 'Sentiment Progress', 'Week', 'Sentiment Value', sentiment_legend=['Positive', 'Negative', 'Neutral'], overlay_data=overlay_data) print 'done'
def main(): parser = argparse.ArgumentParser('Script to generate a CDF comparing the degrees of our participants') parser.add_argument('-l', '-L', type=str, nargs='+', required=True, help='the filters to use, make one or more choices: seenB, wasB, didB') parser.add_argument('-f', '-F', type=str, nargs='+', required=True, help='location of filtered data, from runSurveyStats.py, in the same order as -l/L flag') parser.add_argument('-m', '-M', type=str, required=True, help='location of the message file') parser.add_argument('-mt', '-MT', type=str, default='sms', help='type of message we are filtering, default: sms') parser.add_argument('-n', '-N', action='store_true', help='flag indicates that processing should include participants which did not witness ' 'anything mentioned in the values passed for flags -l/L') parser.add_argument('-a', '-A', action='store_true', help='flag indicates that processing should include a plot of all participants') parser.add_argument('-s', '-S', type=str, required=True, help='folder to store in, leading /') parser.add_argument('-r', '-R', type=str, required=True, help='survey file') args = parser.parse_args() filters_chosen = args.l for filter_v in filters_chosen: if filter_v not in ['seenB', 'didB', 'wasB']: raise Exception('filter value was not from the ones specified') filter_files = args.f assert len(filter_files) == len(filters_chosen), e.len_filter_file_ne_len_filters_chosen include_other_participants = args.n include_all_participants = args.a location_to_store = args.s if not os.path.exists(location_to_store): os.mkdir(location_to_store) message_file = args.m message_type = args.mt survey_file = args.r wi = weeklyinfo() week_info = wi.getweeklyfo(survey_file) gh = ghelper() plt = plots() # get the filtered messages ff = filterfields(message_file) filtered_data = [] if message_type == 'all': for message_type in ['sms', 'fb', 'twitter']: filtered_data.extend(ff.filterbyequality(pr.m_type, message_type)) else: filtered_data = ff.filterbyequality(pr.m_type, message_type) # generate the links and the graph for the filtered data links, links_tuple, graph_obj, pid_dict = hlp.creategraph(filtered_data, filterType=message_type) # get the pids from the chosen filters bullying_pid_dict = hlp.getfilterdata(filters_chosen, filter_files) cumulative_bully_pid = hlp.getfilterdata(filters_chosen, filter_files, cumulative_list=True) # get all the information from the filters catch_all_data = hlp.getfilterdata(filters_chosen, filter_files, catch_all=True) # generate the distributions for in degree and plot them in_distributions = gh.generatedistributions(graph_obj, bullying_pid_dict, include_all_participants, include_other_participants, pid_dict, message_type, cumulative_bully_pid, in_dist=True) in_distributions_ew = gh.generatedistributions(graph_obj, bullying_pid_dict, include_all_participants, include_other_participants, pid_dict, message_type, cumulative_bully_pid, in_dist=True, is_degree=False) plt.generatetablehist(in_distributions, location_to_store + 'in_degree_table.csv', generate_totals=True) plt.generatetablehist(in_distributions_ew, location_to_store + 'in_edge_weight.csv', generate_totals=True) # generate the distributions for out degree and plot them out_distributions = gh.generatedistributions(graph_obj, bullying_pid_dict, include_all_participants, include_other_participants, pid_dict, message_type, cumulative_bully_pid, in_dist=False) out_distributions_ew = gh.generatedistributions(graph_obj, bullying_pid_dict, include_all_participants, include_other_participants, pid_dict, message_type, cumulative_bully_pid, in_dist=False) plt.generatetablehist(out_distributions, location_to_store + 'out_degree_table.csv', generate_totals=True) plt.generatetablehist(out_distributions_ew, location_to_store + 'out_edge_weight.csv', generate_totals=True) # line plot of degrees weekly_dist_degrees, _ = gh.getweeklydistributions(pid_dict, filtered_data, message_type=message_type, is_degree=True, week_info=week_info) overlay_info = gh.createbullyingoverlay(catch_all_data, week_info, ff) plt.plotweeklyprogression(weekly_dist_degrees, location_to_store +'deg_', 'No of friends', 'Week No', 'Friends', overlay_data=overlay_info) # line plot of weights weekly_dist_ew, _ = gh.getweeklydistributions(pid_dict, filtered_data, message_type=message_type, is_degree=False, week_info=week_info) overlay_info = gh.createbullyingoverlay(catch_all_data, week_info, ff) plt.plotweeklyprogression(weekly_dist_ew, location_to_store +'ew_', 'No. of messages exchanged', 'Week No', 'Messages', overlay_data=overlay_info) print 'TADAAA!'