def getstats(filepath, participant_dict, p_type, message_type='sms'): ff = filterfields(filepath) ff.setdata(ff.filterbyequality(pr.m_type, message_type)) participant_stats = {} for participant_id in participant_dict: survey_no_list = participant_dict[participant_id] p_data = ff.filterbyequality(pr.m_source, participant_id) + \ ff.filterbyequality(pr.m_target, participant_id) if [] == p_data: print 'no data exists for pid: ' + participant_id continue pid_dict = hlp.getuniqueparticipants(p_data) for survey_no in survey_no_list: print 'Participant no.', participant_id, ' S.no.: ', survey_no idx = survey_no survey_no = survey_no_list[survey_no][0] end_date = ff.converttodate(survey_no[sr.s_time]) start_date = end_date - dt.timedelta(days=7) data_between_dates = ff.filterbetweendates(start_date, end_date, data_to_work=p_data) original_start_date = ff.converttodate(pr.start_datetime) data_start_to_date = ff.filterbetweendates(original_start_date, start_date, data_to_work=p_data) between_stats, before_stats = graphstats(data_start_to_date, data_between_dates, participant_id, p_type, original_start_date, start_date, ff, pid_dict) temp_dict = {'between': between_stats, 'before': before_stats, 'pid_dict': pid_dict} participant_stats[participant_id] = {idx: temp_dict} return participant_stats
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--messageFile', type=str, required=True) parser.add_argument('-mt', '--messageTypes', type=str, nargs='+') parser.add_argument('-o', '--outputFolder', type=str, required=True) parser.add_argument('-of', '--outputFile', type=str, required=True) parser.add_argument('-pd', '--participantDictionary', type=str) parser.add_argument('-i', '--ignoreParticipants', type=str) parser.add_argument('-mc', '--messageTypeConvert', type=str, nargs='*') args = parser.parse_args() message_file = args.messageFile message_types = args.messageTypes output_folder = args.outputFolder output_file = args.outputFile pid_dict = args.participantDictionary ignore_pids = args.ignoreParticipants message_type_conversions = args.messageTypeConvert ff = filterfields(message_file) ff.setdata(ff.getdata()[1:]) to_set_data = [] # extract the relevant data for message_type in message_types: to_set_data.extend(ff.filterbyequality(pr.m_type, message_type)) ff.setdata(to_set_data) if ignore_pids is not None: ignore_pids = hlp.recovervariable(ignore_pids) for pid in ignore_pids: ff.removebyequality(pr.m_source, pid) ff.removebyequality(pr.m_target, pid) # set the pid to normal id dictionary if pid_dict is None: pid_dict = hlp.getuniqueparticipants(ff.getdata(), mtype='all', separate_pid_npid=True) # replace the message type names with the ones provided if message_type_conversions is not None: for idx in range(0, len(message_type_conversions), 2): message_to_convert = message_type_conversions[idx] to_convert_to = message_type_conversions[idx+1] ff.replacebyequality(pr.m_type, message_to_convert, to_convert_to) message_types = ff.getuniqueelements(pr.m_type) coded_participant_list = pid_dict[pr.participant['all']].values() storage_dict = initiatestorage(coded_participant_list, message_types) storage_dict = getperparticipantinout(ff.getdata(), storage_dict, pid_dict) plotperparticipantbar(storage_dict, 'Participant ID', '# of Messages', message_types, 'Per Participant Messages', output_folder+output_file) hlp.dumpvariable(pid_dict, 'pid_dict.dict', output_folder) hlp.dumpvariable(ff.getdata(), 'messageData.list', output_folder)
def main(): parser = argparse.ArgumentParser('Script to perform sentiment analysis using VADER') parser.add_argument('-m', '-M', type=str, required=True, help='Location of the message file') parser.add_argument('-mt', '-MT', type=str, required=True, nargs='+', help='types of messages to filter') parser.add_argument('-f', '-F', type=str, required=True, help='filename where data is stored, no extension needed') parser.add_argument('-s', '-S', type=str, required=True, help='location of folder to store the file, ends with a /') parser.add_argument('-p', '-P', action='store_true', help='flag to store polarities separately') parser.add_argument('-w', '-W', type=str, required=False, help='conduct weekly analysis, path to the survey data for ' 'creating week information') parser.add_argument('-l', '-L', type=str, nargs='+', required=True, help='the filters to use, make one or more choices: seenB, wasB, didB') parser.add_argument('-lf', '-LF', type=str, nargs='+', required=True, help='location of filtered data, from runSurveyStats.py, in same order as -l/L flag') args = parser.parse_args() message_file = args.m message_types = args.mt filename_to_store = args.f location_to_store = args.s separate_polarity_score = args.p survey_file = args.w filters_chosen = args.l filter_files = args.lf catch_all_data = hlp.getfilterdata(filters_chosen, filter_files, catch_all=True) if separate_polarity_score and survey_file is not None: print 'Cannot have separate polarity scores and weekly analysis together, ' \ 'please remove the -p/-P flag' return if survey_file is not None: wi = weeklyinfo() week_dates = wi.getweeklyfo(survey_file) gh = ghelper() ff = filterfields(message_file) data = [] for message_type in message_types: data.extend(ff.filterbyequality(pr.m_type, message_type)) pid_dict = hlp.getuniqueparticipants(data, 'all' if len(message_types) > 1 else message_types[0]) sentiment_analyzer = vadersenti(data[1:]) returned_data = sentiment_analyzer.compilesentiment(pr.m_content, separate_sentiment_list=separate_polarity_score) if separate_polarity_score: hlp.dumpvariable(returned_data, filename_to_store + '.data', location_to_store) else: header = pr.message_header + ['pos', 'neg', 'neu', 'compound'] final_data = [header] + returned_data hlp.writecsv(final_data, location_to_store + filename_to_store + '.csv') weekly_data = gh.filterweeklydata(pid_dict, returned_data, week_dates, 'all' if len(message_types) > 1 else message_types[0]) hlp.dumpvariable(weekly_data, 'weekly_data.dict', location_to_store) summarized_sentiment = {} for pid in weekly_data: summarized_sentiment[pid] = {} participant_data = weekly_data[pid] for week_no in participant_data: summarized_sentiment[pid][week_no] = sentiment_analyzer.summarizesentiment(participant_data[week_no], separate_in_out=True, message_type=message_type) hlp.dumpvariable(summarized_sentiment, 'weekly_summarized_sentiment.dict', location_to_store) plt = plots() overlay_data = gh.createbullyingoverlay(catch_all_data, week_dates, ff) plt.plotweeklyprogression(summarized_sentiment, location_to_store, 'Sentiment Progress', 'Week', 'Sentiment Value', sentiment_legend=['Positive', 'Negative', 'Neutral'], overlay_data=overlay_data) print 'done'