def open_file(data, filename, session_id): print("started open file " + str(datetime.datetime.now())) if data is not None: content_type, content_string = data.split(',') if '.json' in filename: ret_val = base64.b64decode(content_string) data = json.load(io.BytesIO(ret_val)) else: return(html.H1(children='File not a json')) list_of_dfs = [msg_fx.get_msg_df(msg_dict) for msg_dict in data["Messages"]] all_msg_df = pd.concat(list_of_dfs, axis=0, sort=True) all_msg_df['date'] = all_msg_df['sent_date'].dt.date flag_col = ['explicit_word_in_msg', 'funny_word_in_msg', 'question_mark_in_msg', 'question_word_in_msg', "exclamation_mark_in_msg"] usage_df = pd.DataFrame(data["Usage"]) usage_df.index = pd.to_datetime(usage_df.index) usage_df['total_swipes'] = usage_df['swipes_likes'] + usage_df['swipes_passes'] return([usage_df.to_json(date_format='iso', orient='split') , all_msg_df.to_json(date_format='iso', orient='split')]) else: return([None, None])
def parse_json(data_path, output_path="output_graphs.pdf"): """ Parses JSON, creates pdf of several plots :param data_path: string for location of the json file :param pdf_name: (optional) string for the name and location for the pdf that was created :return: """ print(data_path) if not os.path.isfile(data_path): print("File not found at ", data_path) return (1) # Open JSON file with open(data_path, 'rb') as inp: data = json.load(inp) # Parse Json and put into dataframe with levels of MatchId and message number list_of_dfs = [ msg_fx.get_msg_df(msg_dict) for msg_dict in data["Messages"] ] all_msg_df = pd.concat(list_of_dfs, axis=0, sort=True) # Get plots related to messages msg_plots = msg_fx.get_msg_related_plots(all_msg_df) msg_metrics = msg_fx.get_message_metrics(all_msg_df) # Gather data for usage plots usage_df = pd.DataFrame(data["Usage"]) usage_plots = usage.create_usage_plots(usage_df) usage_metrics = usage.gather_usage_stats(usage_df) # Gather user info to keep user_df = user.get_userdf_parts(data["User"]) # Combine metrics to be stored all_metrics = {} all_metrics["usage"] = usage_metrics all_metrics["message"] = msg_metrics all_metrics["user"] = user_df for metric_type in all_metrics.keys(): if type(all_metrics[metric_type]) == dict: for key in all_metrics[metric_type].keys(): if (type(all_metrics[metric_type]) == pd.DataFrame) or \ (type(all_metrics[metric_type][key]) == pd.Series): all_metrics[metric_type][key] = all_metrics[metric_type][ key].to_dict() # Export plots to pdf pp = PdfPages(output_path) for tmp_plt in msg_plots: pp.savefig(tmp_plt) for tmp_plt in usage_plots: pp.savefig(tmp_plt) pp.close() print("Completed parse json!") return (all_metrics)
def calculate_all_msg_dataframe(session_id, data): print('starting all msg 2') list_of_dfs = [msg_fx.get_msg_df(msg_dict) for msg_dict in data["Messages"]] print("Working on msg df "+ str(datetime.datetime.now())) all_msg_df = pd.concat(list_of_dfs, axis=0, sort=True) all_msg_df['date'] = all_msg_df['sent_date'].dt.date print('error in json') return(all_msg_df.reset_index().to_json())
def parse_upload(upload_file, filename): print('Parse upload function started') if upload_file is not None: print('Found uploaded file ') content_type, content_string = upload_file[0].split(',') decoded = base64.b64decode(content_string) print('Filename detected as:', filename[0]) if '.json' in filename[0][-5:]: data = json.load(io.BytesIO(decoded)) elif '.zip' in filename[0][-4:]: zf = zipfile.ZipFile(io.BytesIO(decoded)) file_str = zf.read('data.json') data = json.loads(file_str) else: print("File type not recognized") return([None, None, None]) pass list_of_dfs = [msg_fx.get_msg_df(msg_dict) for msg_dict in data["Messages"]] all_msg_df = pd.concat(list_of_dfs, axis=0, sort=True) # all_msg_df['date'] = all_msg_df['sent_date'].dt.date usage_df = pd.DataFrame(data["Usage"]) # usage_df.index = pd.to_datetime(usage_df.index) usage_df['total_swipes'] = usage_df['swipes_likes'] + usage_df['swipes_passes'] msg_df_string = all_msg_df.reset_index().to_json(date_format='iso', orient='split') usage_df_string = json.dumps(data['Usage']) print('parse fx complete') # # S3 Upload # with open('credentials.pkl', 'rb') as hnd: # key = pickle.load(hnd) # s3 = boto3.client('s3', aws_access_key_id=key['Access key ID'], aws_secret_access_key=key['Secret access key']) # # filename = "_".join([data['User']['create_date'], data['User']['birth_date'], str(datetime.datetime.now())]) # filename = filename + ".txt" # # all_data = json.dumps(data)[:100] # # post = s3.generate_presigned_post( # Bucket='tinder-files-eb', # Key=filename # ) # files = {'file' : all_data} # res = requests.post(post["url"], data=post["fields"], files=files) # print('Response to s3 post: ', res) all_data_str = json.dumps(data) return ([usage_df_string, msg_df_string, all_data_str]) else: print('Nothing uploaded, Time: ', str(datetime.datetime.now())) return ([None, None, None])
def parse_json(data_path, pdf_name="output_graphs.pdf"): """ Parses JSON, creates pdf of several plots :param data_path: string for location of the json file :param pdf_name: (optional) string for the name and location for the pdf that was created :return: """ print(type(data_path)) # Open JSON file with open(data_path, "rb") as inp: data = json.load(inp) # Parse Json and put into dataframe with levels of MatchId and message number msg_df = pd.DataFrame(data['Messages'][10]['messages']) msg_df['sent_date'] = pd.to_datetime(msg_df['sent_date']) list_of_dfs = [ mt_eda.get_msg_df(msg_dict) for msg_dict in data["Messages"] ] all_msg_df = pd.concat(list_of_dfs, axis=0) # Data preparation for plots all_msg_df['flatten_date'] = all_msg_df['sent_date'].apply( mt_eda.flatten_date) dt_gb = all_msg_df.groupby('flatten_date') flag_col = [ 'explicit_word_in_msg', 'funny_word_in_msg', 'question_mark_in_msg', 'question_word_in_msg' ] n_msg_over_time = dt_gb.apply(len) # Create plots of message over time with flags plts = [] plts.append(mt_eda.plot_number_of_msgs_ovr_time(n_msg_over_time)) for demo_flg in flag_col: plts.append( mt_eda.plot_flag_fx(n_msg_over_time, dt_gb[demo_flg].sum(), demo_flg)) # Export plots to pdf pp = PdfPages(pdf_name) for plt in plts: pp.savefig(plt) pp.close() print("Complete!") return (0)
def get_messages_df(self): list_of_dfs = [msg_fx.get_msg_df(msg_dict) for msg_dict in self.data["Messages"]] self.all_msg_df = pd.concat(list_of_dfs, axis=0, sort=True)