def main(): year, country = get_arguments(sys.argv[1:]) # load dataset cleaning_config = CleaningConfig(year, country) df = pd.read_csv(cleaning_config.raw_data) cleaning_process = CleaningData(year, country, df) df = cleaning_process.cleaning() cleaning_process.write_df() cleaning_process.write_config_file()
def main(): pd.set_option('display.max_rows', 300) # Get which country and which year to create the analysis year, country = get_arguments(sys.argv[1:]) notebook_config = NotebookConfig(year, country) # Get the folder to record df counting_config = CountingConfig(year, country) folder_df = counting_config.folder_df # Notebook writing notebook = GenerateNotebook(year, country, notebook_config.notebook_filename) notebook.output_total_participants() structure_by_section = get_json_config_section( notebook_config.json_to_plot_location) for s in structure_by_section: section = structure_by_section[s] notebook.add_section(s) for group in section: notebook.add_group(group) for question in section[group]: list_questions = question['survey_q'] original_question = question['original_question'] answer_format = question['answer_format'] file_answer = question['file_answer'] order_question = question['order_question'] # To avoid having each questions written in a new line # it joins them together before writing it question_to_write = '; '.join(original_question) notebook.add_question_title(question_to_write) if answer_format not in [ 'freetext', 'datetime', 'freenumeric' ]: notebook.add_count(list_questions, answer_format, file_answer, order_question, folder_df) notebook.add_percentage() notebook.add_display_all() notebook.add_plot(answer_format) if answer_format == 'freetext': notebook.add_wordcloud(list_questions) # notebook.add_count(list_questions, answer_format, file_answer) # notebook.add_plot(answer_format) if answer_format == 'freenumeric': notebook.add_count(list_questions, answer_format, file_answer, order_question, folder_df) notebook.add_plot(answer_format) print('Running notebook') notebook.run_notebook() print('Saving notebook') notebook.save_notebook()
def main(): # Get which country and which year to create the analysis year, country = get_arguments(sys.argv[1:]) create_survey = surveyCreation(country, year) create_survey.run()
def main(): pd.set_option('display.max_rows', 300) # Get which country and which year to create the analysis year, country = get_arguments(sys.argv[1:]) notebook_config = NotebookConfig(year, country) # Get the folder to record df counting_config = CountingConfig(year, country) folder_df = counting_config.folder_df # Notebook writing notebook = GenerateNotebook(year, country, notebook_config.notebook_filename) notebook.output_total_participants() structure_by_section = get_json_config_section( notebook_config.json_to_plot_location) for s in structure_by_section: section = structure_by_section[s] notebook.add_section(s) for group in section: notebook.add_group(group) for question in section[group]: list_questions = question['survey_q'] original_question = question['original_question'] answer_format = question['answer_format'] file_answer = question['file_answer'] order_question = question['order_question'] # To avoid having each questions written in a new line # it joins them together before writing it question_to_write = '; '.join(original_question) notebook.add_question_title(question_to_write) # for txt in original_question: # notebook.add_question_title(txt) if answer_format not in [ 'freetext', 'datetime', 'freenumeric' ]: notebook.add_count(list_questions, answer_format, file_answer, order_question, folder_df) # Need to specify != likert because if likert item == 1 it uses the barchart # and will plot the percentages even if it doesn't make sense to do that for # a likert scale if notebook_config.show_percent is True and answer_format != 'likert': notebook.add_percentage() notebook.add_display_all() else: notebook.add_display_count() notebook.add_plot(answer_format) if answer_format == 'freetext': notebook.add_wordcloud(list_questions) # notebook.add_count(list_questions, answer_format, file_answer) # notebook.add_plot(answer_format) if answer_format == 'freenumeric': notebook.add_count(list_questions, answer_format, file_answer, order_question, folder_df) notebook.add_plot(answer_format) # if answer_format == 'ranking': # notebook.add_count(list_questions, answer_format, file_answer, order_question, # folder_df) # notebook.add_percentage() # notebook.add_display_percentage() # notebook.add_plot(answer_format) # print('Running notebook') notebook.run_notebook() print('Saving notebook') notebook.save_notebook()