コード例 #1
0
def open_file(data, filename, session_id):
    print("started open file " + str(datetime.datetime.now()))
    if data is not None:
        content_type, content_string = data.split(',')
        if '.json' in filename:
            ret_val = base64.b64decode(content_string)
            data = json.load(io.BytesIO(ret_val))
        else:
            return(html.H1(children='File not a json'))

        list_of_dfs = [msg_fx.get_msg_df(msg_dict) for msg_dict in data["Messages"]]
        all_msg_df = pd.concat(list_of_dfs, axis=0, sort=True)
        all_msg_df['date'] = all_msg_df['sent_date'].dt.date

        flag_col = ['explicit_word_in_msg', 'funny_word_in_msg', 'question_mark_in_msg', 'question_word_in_msg',
                    "exclamation_mark_in_msg"]

        usage_df = pd.DataFrame(data["Usage"])
        usage_df.index = pd.to_datetime(usage_df.index)
        usage_df['total_swipes'] = usage_df['swipes_likes'] + usage_df['swipes_passes']

        return([usage_df.to_json(date_format='iso', orient='split')
               , all_msg_df.to_json(date_format='iso', orient='split')])
    else:
        return([None, None])
コード例 #2
0
def parse_json(data_path, output_path="output_graphs.pdf"):
    """
    Parses JSON, creates pdf of several plots

    :param data_path: string for location of the json file
    :param pdf_name: (optional) string for the name and location for the pdf that was created
    :return:
    """
    print(data_path)
    if not os.path.isfile(data_path):
        print("File not found at ", data_path)
        return (1)
    # Open JSON file
    with open(data_path, 'rb') as inp:
        data = json.load(inp)

    # Parse Json and put into dataframe with levels of MatchId and message number
    list_of_dfs = [
        msg_fx.get_msg_df(msg_dict) for msg_dict in data["Messages"]
    ]
    all_msg_df = pd.concat(list_of_dfs, axis=0, sort=True)

    # Get plots related to messages
    msg_plots = msg_fx.get_msg_related_plots(all_msg_df)
    msg_metrics = msg_fx.get_message_metrics(all_msg_df)

    # Gather data for usage plots
    usage_df = pd.DataFrame(data["Usage"])
    usage_plots = usage.create_usage_plots(usage_df)
    usage_metrics = usage.gather_usage_stats(usage_df)

    # Gather user info to keep
    user_df = user.get_userdf_parts(data["User"])

    # Combine metrics to be stored
    all_metrics = {}
    all_metrics["usage"] = usage_metrics
    all_metrics["message"] = msg_metrics
    all_metrics["user"] = user_df

    for metric_type in all_metrics.keys():
        if type(all_metrics[metric_type]) == dict:
            for key in all_metrics[metric_type].keys():
                if (type(all_metrics[metric_type]) == pd.DataFrame) or \
                    (type(all_metrics[metric_type][key]) == pd.Series):
                    all_metrics[metric_type][key] = all_metrics[metric_type][
                        key].to_dict()

    # Export plots to pdf
    pp = PdfPages(output_path)
    for tmp_plt in msg_plots:
        pp.savefig(tmp_plt)

    for tmp_plt in usage_plots:
        pp.savefig(tmp_plt)

    pp.close()
    print("Completed parse json!")

    return (all_metrics)
コード例 #3
0
    def calculate_all_msg_dataframe(session_id, data):
        print('starting all msg 2')

        list_of_dfs = [msg_fx.get_msg_df(msg_dict) for msg_dict in data["Messages"]]
        print("Working on msg df "+ str(datetime.datetime.now()))
        all_msg_df = pd.concat(list_of_dfs, axis=0, sort=True)
        all_msg_df['date'] = all_msg_df['sent_date'].dt.date
        print('error in json')
        return(all_msg_df.reset_index().to_json())
コード例 #4
0
def parse_upload(upload_file, filename):
    print('Parse upload function started')
    if upload_file is not None:
        print('Found uploaded file ')
        content_type, content_string = upload_file[0].split(',')
        decoded = base64.b64decode(content_string)
        print('Filename detected as:', filename[0])
        if '.json' in filename[0][-5:]:
            data = json.load(io.BytesIO(decoded))
        elif '.zip' in filename[0][-4:]:
            zf = zipfile.ZipFile(io.BytesIO(decoded))
            file_str = zf.read('data.json')
            data = json.loads(file_str)
        else:
            print("File type not recognized")
            return([None, None, None])


            pass
        list_of_dfs = [msg_fx.get_msg_df(msg_dict) for msg_dict in data["Messages"]]
        all_msg_df = pd.concat(list_of_dfs, axis=0, sort=True)
        # all_msg_df['date'] = all_msg_df['sent_date'].dt.date

        usage_df = pd.DataFrame(data["Usage"])
        # usage_df.index = pd.to_datetime(usage_df.index)
        usage_df['total_swipes'] = usage_df['swipes_likes'] + usage_df['swipes_passes']
        msg_df_string = all_msg_df.reset_index().to_json(date_format='iso', orient='split')
        usage_df_string = json.dumps(data['Usage'])
        print('parse fx complete')

        # # S3 Upload
        # with open('credentials.pkl', 'rb') as hnd:
        #     key = pickle.load(hnd)
        # s3 = boto3.client('s3', aws_access_key_id=key['Access key ID'], aws_secret_access_key=key['Secret access key'])
        #
        # filename = "_".join([data['User']['create_date'], data['User']['birth_date'], str(datetime.datetime.now())])
        # filename = filename + ".txt"
        #
        # all_data = json.dumps(data)[:100]
        #
        # post = s3.generate_presigned_post(
        #     Bucket='tinder-files-eb',
        #     Key=filename
        # )
        # files = {'file' : all_data}
        # res = requests.post(post["url"], data=post["fields"], files=files)
        # print('Response to s3 post: ', res)

        all_data_str = json.dumps(data)

        return ([usage_df_string, msg_df_string, all_data_str])
    else:
        print('Nothing uploaded, Time: ', str(datetime.datetime.now()))
        return ([None, None, None])
コード例 #5
0
def parse_json(data_path, pdf_name="output_graphs.pdf"):
    """
    Parses JSON, creates pdf of several plots

    :param data_path: string for location of the json file
    :param pdf_name: (optional) string for the name and location for the pdf that was created
    :return:
    """

    print(type(data_path))
    # Open JSON file
    with open(data_path, "rb") as inp:
        data = json.load(inp)

    # Parse Json and put into dataframe with levels of MatchId and message number
    msg_df = pd.DataFrame(data['Messages'][10]['messages'])
    msg_df['sent_date'] = pd.to_datetime(msg_df['sent_date'])
    list_of_dfs = [
        mt_eda.get_msg_df(msg_dict) for msg_dict in data["Messages"]
    ]
    all_msg_df = pd.concat(list_of_dfs, axis=0)

    # Data preparation for plots
    all_msg_df['flatten_date'] = all_msg_df['sent_date'].apply(
        mt_eda.flatten_date)
    dt_gb = all_msg_df.groupby('flatten_date')
    flag_col = [
        'explicit_word_in_msg', 'funny_word_in_msg', 'question_mark_in_msg',
        'question_word_in_msg'
    ]
    n_msg_over_time = dt_gb.apply(len)

    # Create plots of message over time with flags
    plts = []
    plts.append(mt_eda.plot_number_of_msgs_ovr_time(n_msg_over_time))
    for demo_flg in flag_col:
        plts.append(
            mt_eda.plot_flag_fx(n_msg_over_time, dt_gb[demo_flg].sum(),
                                demo_flg))

    # Export plots to pdf
    pp = PdfPages(pdf_name)
    for plt in plts:
        pp.savefig(plt)
    pp.close()

    print("Complete!")
    return (0)
コード例 #6
0
 def get_messages_df(self):
     list_of_dfs = [msg_fx.get_msg_df(msg_dict) for msg_dict in self.data["Messages"]]
     self.all_msg_df = pd.concat(list_of_dfs, axis=0, sort=True)