def custom_parse(x): if not isinstance(x, str): return x try: return datetime_from_format(x, "%Y-%m-%dT%H:%M:%SZ", in_utc=True) except: return datetime_from_format(x, "%Y-%m-%dT%H:%M:%S.%fZ", in_utc=True)
def load(cls, nrows=None): path = "~/nostalgia_data/input/samsung/samsunghealth_*/com.samsung.health.sleep_stage.*.csv" fname = just.glob(path)[0] data = cls.load_data_file_modified_time(fname, nrows=nrows, skiprows=1) data["start_time"] = [ datetime_from_format(x, "%Y-%m-%d %H:%M:%S.%f") for x in data["start_time"] ] data["end_time"] = [ datetime_from_format(x, "%Y-%m-%d %H:%M:%S.%f") for x in data["end_time"] ] return cls(data)
def load(cls, nrows=None): files = "~/nostalgia_data/input/spotify/StreamingHistory*.json" spotify = pd.DataFrame( [ ( datetime_from_format(x["endTime"], "%Y-%m-%d %H:%M") - timedelta(milliseconds=x["msPlayed"]), datetime_from_format(x["endTime"], "%Y-%m-%d %H:%M"), x["trackName"], x["artistName"], x["msPlayed"] / 1000 ) for x in flatten(just.multi_read(files).values()) ], columns=["time_start", "time_end", "title", "artist", "seconds"], ) return cls(spotify)
def load(cls, nrows=None): file_glob = "~/nostalgia_data/input/fitbit/*/sleep/*.json" objects = [] for d in just.multi_read(file_glob).values(): if not d: continue for x in d: data = pd.DataFrame(x["levels"]["data"] + [{ 'dateTime': x['endTime'], 'level': None, 'seconds': None }]) data["dateTime"] = [ datetime_from_format(x, "%Y-%m-%dT%H:%M:%S.%f") for x in data.dateTime ] start = data.dateTime.iloc[:-1] end = data.dateTime.iloc[1:] interval_index = pd.IntervalIndex.from_arrays(start, end) data = pd.DataFrame(data.iloc[:-1]) data = data.set_index(interval_index) data["start"] = data.index.left data["end"] = data.index.right objects.append(data) if nrows is not None and data.shape[0] > nrows: break data = pd.concat(objects).drop("dateTime", axis=1) return cls(data)
def load(cls, nrows=None): files = "~/nostalgia_data/input/apple/*/iCloudUsageData Set*.csv" icloud = pd.concat([pd.read_csv(f, skiprows=1, error_bad_lines=False) for f in just.glob(files)]) icloud = icloud.iloc[ : icloud.loc[icloud.Date == "Photos: Delete photo/video from iCloud Photo Library"].index.to_list()[0] ] icloud["File Capture Date"] = icloud["File Capture Date"].apply(lambda x: datetime_from_format(x, "%Y-%m-%d")) return cls(icloud)
def load(cls, nrows=None, **kwargs): old_text = "" results = [] nrows = nrows or float("inf") for file_path in just.glob("~/nostalgia_data/input/whatsapp/*.txt"): row = 0 for line in just.iread(file_path): try: time = datetime_from_format(line[:offset], "%d/%m/%Y, %H:%M - ") except ValueError: old_text += line + "\n" continue line = old_text + line[offset:] old_text = "" try: if line.startswith( "Messages to this chat and calls are now secured"): continue sender, text = line.split(": ", 1) except ValueError: print("ERR", line) continue if line: if row > nrows: break row += 1 results.append((time, sender, text)) df = pd.DataFrame(results, columns=["time", "sender", "text"]) # hack "order" into minute data same_minute = df.time == df.shift(1).time seconds = [] second_prop = 0 for x in same_minute: if x: second_prop += 1 else: second_prop = 0 seconds.append( pd.Timedelta(seconds=60 * second_prop / (second_prop + 1))) df["time"] = df["time"] + pd.Series(seconds) return cls(df)
def find_date(x): date_regex = ".*(\d{2}[\.-]\d{2}[\.-]\d{2,4}[\/]\d{2}[\.-]\d{2}).*" value = str(x).replace("\\", "\\\\") m = re.match(date_regex, value) if m: return datetime_from_format(m.group(1), "%d.%m.%y/%H.%M")
def convert_date(date): return datetime_from_format(str(date), "%Y%m%d")
def handle_dataframe_per_file(cls, df, fname): if df.empty: return None day = get_day(fname) df["time"] = [datetime_from_format(day + " " + x, "%Y %m %d %H:%M:%S") for x in df.time] return df