Ejemplo n.º 1
0
def custom_parse(x):
    if not isinstance(x, str):
        return x
    try:
        return datetime_from_format(x, "%Y-%m-%dT%H:%M:%SZ", in_utc=True)
    except:
        return datetime_from_format(x, "%Y-%m-%dT%H:%M:%S.%fZ", in_utc=True)
Ejemplo n.º 2
0
 def load(cls, nrows=None):
     path = "~/nostalgia_data/input/samsung/samsunghealth_*/com.samsung.health.sleep_stage.*.csv"
     fname = just.glob(path)[0]
     data = cls.load_data_file_modified_time(fname, nrows=nrows, skiprows=1)
     data["start_time"] = [
         datetime_from_format(x, "%Y-%m-%d %H:%M:%S.%f")
         for x in data["start_time"]
     ]
     data["end_time"] = [
         datetime_from_format(x, "%Y-%m-%d %H:%M:%S.%f")
         for x in data["end_time"]
     ]
     return cls(data)
Ejemplo n.º 3
0
 def load(cls, nrows=None):
     files = "~/nostalgia_data/input/spotify/StreamingHistory*.json"
     spotify = pd.DataFrame(
         [
             (
                 datetime_from_format(x["endTime"], "%Y-%m-%d %H:%M") - timedelta(milliseconds=x["msPlayed"]),
                 datetime_from_format(x["endTime"], "%Y-%m-%d %H:%M"),
                 x["trackName"],
                 x["artistName"],
                 x["msPlayed"] / 1000
             )
             for x in flatten(just.multi_read(files).values())
         ],
         columns=["time_start", "time_end", "title", "artist", "seconds"],
     )
     return cls(spotify)
Ejemplo n.º 4
0
    def load(cls, nrows=None):
        file_glob = "~/nostalgia_data/input/fitbit/*/sleep/*.json"
        objects = []
        for d in just.multi_read(file_glob).values():
            if not d:
                continue
            for x in d:
                data = pd.DataFrame(x["levels"]["data"] +
                                    [{
                                        'dateTime': x['endTime'],
                                        'level': None,
                                        'seconds': None
                                    }])
                data["dateTime"] = [
                    datetime_from_format(x, "%Y-%m-%dT%H:%M:%S.%f")
                    for x in data.dateTime
                ]
                start = data.dateTime.iloc[:-1]
                end = data.dateTime.iloc[1:]
                interval_index = pd.IntervalIndex.from_arrays(start, end)
                data = pd.DataFrame(data.iloc[:-1])
                data = data.set_index(interval_index)
                data["start"] = data.index.left
                data["end"] = data.index.right
                objects.append(data)
                if nrows is not None and data.shape[0] > nrows:
                    break

        data = pd.concat(objects).drop("dateTime", axis=1)

        return cls(data)
Ejemplo n.º 5
0
    def load(cls, nrows=None):
        files = "~/nostalgia_data/input/apple/*/iCloudUsageData Set*.csv"

        icloud = pd.concat([pd.read_csv(f, skiprows=1, error_bad_lines=False) for f in just.glob(files)])
        icloud = icloud.iloc[
            : icloud.loc[icloud.Date == "Photos: Delete photo/video from iCloud Photo Library"].index.to_list()[0]
        ]
        icloud["File Capture Date"] = icloud["File Capture Date"].apply(lambda x: datetime_from_format(x, "%Y-%m-%d"))
        return cls(icloud)
Ejemplo n.º 6
0
    def load(cls, nrows=None, **kwargs):
        old_text = ""
        results = []
        nrows = nrows or float("inf")
        for file_path in just.glob("~/nostalgia_data/input/whatsapp/*.txt"):
            row = 0
            for line in just.iread(file_path):
                try:
                    time = datetime_from_format(line[:offset],
                                                "%d/%m/%Y, %H:%M - ")
                except ValueError:
                    old_text += line + "\n"
                    continue
                line = old_text + line[offset:]
                old_text = ""
                try:
                    if line.startswith(
                            "Messages to this chat and calls are now secured"):
                        continue
                    sender, text = line.split(": ", 1)
                except ValueError:
                    print("ERR", line)
                    continue
                if line:
                    if row > nrows:
                        break
                    row += 1
                    results.append((time, sender, text))

        df = pd.DataFrame(results, columns=["time", "sender", "text"])
        # hack "order" into minute data
        same_minute = df.time == df.shift(1).time
        seconds = []
        second_prop = 0
        for x in same_minute:
            if x:
                second_prop += 1
            else:
                second_prop = 0
            seconds.append(
                pd.Timedelta(seconds=60 * second_prop / (second_prop + 1)))
        df["time"] = df["time"] + pd.Series(seconds)
        return cls(df)
Ejemplo n.º 7
0
def find_date(x):
    date_regex = ".*(\d{2}[\.-]\d{2}[\.-]\d{2,4}[\/]\d{2}[\.-]\d{2}).*"
    value = str(x).replace("\\", "\\\\")
    m = re.match(date_regex, value)
    if m:
        return datetime_from_format(m.group(1), "%d.%m.%y/%H.%M")
Ejemplo n.º 8
0
def convert_date(date):
    return datetime_from_format(str(date), "%Y%m%d")
Ejemplo n.º 9
0
 def handle_dataframe_per_file(cls, df, fname):
     if df.empty:
         return None
     day = get_day(fname)
     df["time"] = [datetime_from_format(day + " " + x, "%Y %m %d %H:%M:%S") for x in df.time]
     return df