Beispiel #1
0
def test_csv_iread_problem_lines():
    fname = "testobj.csv"
    obj = ["a"] + [['"a"', '"b"']]
    just.write(obj, "testobj.csv")
    try:
        just.read(fname)
    except ValueError:
        assert True
    finally:
        os.remove(fname)
Beispiel #2
0
def read_array_of_dict_from_json(fname, key_name=None, nrows=None):
    """
    This is an iterative way to read a json file without having to construct Python elements for everything.
    It can be a lot faster.

    Example data:
    {"participants": {"name": "a", "name": "b", "messages": [{"sender": "a", "time": 123}, {"sender": "b", "time": 124}]}}

    Function call:
    read_array_of_dict_from_json(fname, "messages", nrows=1)

    Returns:
    pd.DataFrame([{"sender": "a", "time": 123}])
    """
    if fname.endswith(".jsonl"):
        if not key_name:
            return pd.read_json(fname, lines=True)
        else:
            return pd.DataFrame([x[key_name] for x in just.read(fname)])

    if nrows is None:
        if not key_name:
            return pd.read_json(fname, lines=fname.endswith(".jsonl"))
        else:
            return pd.DataFrame(just.read(fname)[key_name])

    import ijson

    with open(just.make_path(fname)) as f:
        parser = ijson.parse(f)
        capture = False
        rows = []
        row = {}
        map_key = ""
        num = 0
        for prefix, event, value in parser:
            if num > nrows:
                break
            if prefix == key_name and event == "start_array":
                capture = True
            if not capture:
                continue
            if event == "start_map":
                continue
            elif event == "map_key":
                map_key = value
            elif event == "end_map":
                rows.append(row)
                row = {}
                num += 1
            elif map_key:
                row[map_key] = value
    return pd.DataFrame(rows)
Beispiel #3
0
def get_model(model_name=None, models_path="models/"):
    if model_name:
        model_file = models_path + model_name + ".json"
        weights_file = models_path + model_name + ".h5"
        if os.path.isfile(model_file):
            model = model_from_json(json.dumps(just.read(model_file)))
            # load weights into new model
            model.load_weights(weights_file)
            print("Loaded model from disk")
        else:
            print("Cannot read model, creating fresh one")
            # Create the model
            model = Sequential()
            model.add(BatchNormalization(input_shape=(3, 72, 128)))
            model.add(Convolution2D(32, 3, 3, border_mode='same', activation='relu'))
            model.add(Activation('relu'))
            model.add(Dropout(0.15))
            model.add(BatchNormalization())
            model.add(Convolution2D(32, 3, 3, activation='relu', border_mode='same'))
            model.add(MaxPooling2D(pool_size=(2, 2)))
            model.add(Flatten())
            model.add(Dense(512, activation='relu'))
            model.add(Dropout(0.5))
            # to prediction
            model.add(Dense(2))
            model.add(Activation('linear'))
    model.compile(loss='mean_squared_error', optimizer="adam")
    return model
Beispiel #4
0
    def load(cls, nrows=None):
        photo_glob = "~/nostalgia_data/input/google/Takeout/Google Photos/*/*"
        pics = []
        nrows = nrows or float("inf")
        rows = 0
        for fname in just.glob(photo_glob):
            if fname.endswith(".json"):
                continue
            try:
                meta = just.read(fname + ".json")
            except FileNotFoundError:
                continue
            if rows == nrows:
                break
            date = datetime.fromtimestamp(
                int(meta["photoTakenTime"]["timestamp"]), tz)
            latitude, longitude = format_latlng(
                (meta["geoData"]["latitude"],
                 meta["geoData"]["longitude"])).split(", ")
            title = meta["title"]
            pics.append({
                "path": "file://" + fname,
                "lat": latitude,
                "lon": longitude,
                "title": title,
                "time": date,
            })
            rows += 1

        pics = pd.DataFrame(pics)
        return cls(pics)
Beispiel #5
0
def file_modified_since_last(fname, name):
    path = just.make_path("~/nostalgia_data/seen/" + slugify(name) + ".json")
    last_run_mt = float(just.read(path, no_exist=0))
    modified_time = os.path.getmtime(fname)
    if last_run_mt != modified_time:
        return modified_time
    else:
        return None
Beispiel #6
0
def view(path):
    if path.endswith("gz"):
        from requests_viewer import view_html

        view_html(just.read(path))
    else:
        import webbrowser

        webbrowser.open("file://" + path)
Beispiel #7
0
 def __init__(self, model_name, encoder_decoder=None, hidden_units=128, base_path="models/"):
     self.model_name = model_name
     self.h5_path = base_path + model_name + ".h5"
     self.pkl_path = base_path + model_name + ".pkl"
     self.model = None
     self.hidden_units = hidden_units
     if encoder_decoder is None:
         self.encoder_decoder = just.read(self.pkl_path)
     else:
         self.encoder_decoder = encoder_decoder
Beispiel #8
0
 def load(cls, file_path="~/nostalgia_data/input/shazam.json", nrows=None):
     shazam = pd.DataFrame(
         [(
             datetime_from_timestamp(x["timestamp"], x["timezone"]),
             x["track"]["heading"]["title"],
             x["track"]["heading"]["subtitle"],
         ) for x in just.read(file_path)["tags"]],
         columns=["time", "title", "artist"],
     )
     return cls(shazam)
Beispiel #9
0
def check_seen(name, value):
    path = "~/nostalgia_data/seen/" + slugify(name) + ".json"
    is_new = True
    res = just.read(path, no_exist=False)
    if res:
        if isinstance(value, tuple):
            value = list(value)
        is_new = res != value
    if is_new:
        just.write(value, path)
    return is_new
Beispiel #10
0
def get_title(x):
    if not x.get("domain"):
        return ""
    if x["path"] in CACHE:
        return CACHE[x["path"]]
    # tree = lxml.html.fromstring(just.read(x["path"]))
    # title = tree.xpath("/html/head/title/text()") or tree.xpath("//title/text()") or [""]
    # destroy_tree(tree)
    # title = title[0]
    match = re.search("<title>([^<]+)</title", just.read(x["path"]),
                      re.MULTILINE)
    title = match.groups()[0].strip() if match is not None else ""
    CACHE[x["path"]] = title
    return title
Beispiel #11
0
 def __init__(self,
              model_name,
              encoder_decoder=None,
              hidden_units=128,
              base_path="models/"):
     self.model_name = model_name  # 模型名称
     self.h5_path = base_path + model_name + ".h5"  # 模型h5路径
     self.pkl_path = base_path + model_name + ".pkl"  # 数据pkl路径
     self.model = None  # 模型
     self.hidden_units = hidden_units  # 隐含单元
     if encoder_decoder is None:  # 判断有无编译码器
         self.encoder_decoder = just.read(self.pkl_path)
     else:
         self.encoder_decoder = encoder_decoder
Beispiel #12
0
def search():
    global matches
    word = "".join(letters).lower()
    matches = [x for x in emojis if word in x.lower()] or emojis[:len(imgs)]
    for i in range(len(imgs)):
        color = active_color if selected_index == i else None
        if i in range(len(matches)):
            uni, txt, code = matches[i].split("| ")
            if code not in cache:
                cache[code] = just.read(f"~/emoji_picker_images/{code}.base64",
                                        unknown_type="txt")
            imgs[i].update(data=cache[code])
        else:
            txt = ""
            imgs[i].update(data="")
        inps[i].update(value=txt, text_color=color)
Beispiel #13
0
def get_linked_data(x):
    path = x["path"]
    if path in CACHE:
        return CACHE[path]
    try:
        html = just.read(path)
    except EOFError:
        CACHE[path] = None
        return None
    if not html.strip():
        CACHE[path] = None
        return None
    art = parse_article(html, x["url"])
    linked_data = get_linked_data_md(art)
    if linked_data is None:
        linked_data = get_linked_data_jd(art)
    CACHE[path] = linked_data
    return linked_data
 def ensure_access_token(self):
     if self.access_token:
         return
     now = int(time.time())
     if just.exists(self.ACCESS_TOKEN_FILE):
         access_token = just.read(self.ACCESS_TOKEN_FILE,
                                  unknown_type="json")
         if now > access_token['time'] + access_token['expires_in']:
             log.info('Cached access token is expired')
             os.unlink(self.ACCESS_TOKEN_FILE)
         else:
             self.access_token = access_token
             return
     self.access_token = self.get_access_token()
     self.access_token['time'] = now
     just.write(self.access_token,
                self.ACCESS_TOKEN_FILE,
                unknown_type="json")
Beispiel #15
0
def get_linked_data(x):
    path = x["path"]
    if path in CACHE:
        return CACHE[path]
    try:
        html = just.read(path)
    except EOFError:
        CACHE[path] = None
        return None
    if not html.strip():
        CACHE[path] = None
        return None
    art = parse_article(html, x["url"])
    if "youtube" not in art.domain:
        return None
    title = re.sub(" - YouTube$", "", art.tree.xpath("//title/text()")[0])
    if title == "YouTube":
        CACHE[path] = None
        return None
    if not title:
        return None
    vc = art.tree.xpath("//span[contains(@class, 'view-count')]/text()")
    vc = re.sub("[^0-9]", "", vc[0]) if vc else None
    watch_part = urllib.parse.parse_qs(urllib.parse.urlparse(
        x["url"]).query)["v"]
    if watch_part:
        image = "http://i3.ytimg.com/vi/{}/maxresdefault.jpg".format(
            watch_part[0])
    else:
        image = None
    channel = art.tree.xpath("//ytd-video-owner-renderer//a/text()")
    if not channel:
        channel = art.tree.xpath("//ytd-channel-name//a/text()")
    channel = " ".join(channel)
    linked_data = {
        "title": title,
        "type": "video",
        "source": "youtube",
        "image": image,
        "view_count": vc,
        "channel": channel,
    }
    CACHE[path] = linked_data
    return linked_data
Beispiel #16
0
def mockle(name, good=None, exception=""):
    name = get_path(name)
    if get_env() == "dev":
        if not exception:
            print("storing mocked", name)
            just.write(good, name)
            return good
        else:
            try:
                print("load mocked", name, "ignoring exception:")
                print(exception)
                return just.read(name)
            except:
                # do not raise "this" exception, but the original
                raise exception
    elif exception == "":
        return good
    else:
        raise exception
Beispiel #17
0
 def __enter__(self):
     try:
         good = self.fn(self.args)
     except Exception as e:
         exception = e
     if self.env == "dev" and exception:
         try:
             print("load mocked", self.name, "ignoring exception:")
             print(exception)
             return just.read(self.name)
         except:
             # do not raise this exception, but the original
             raise exception
     elif self.env == "dev" and good is not None:
         print("storing mocked", self.name)
         just.write(good, self.name)
         return good
     elif exception:
         raise exception
Beispiel #18
0
 def load_json_file_modified_time(cls,
                                  fname,
                                  nrows=None,
                                  from_cache=True,
                                  **kwargs):
     name = fname + "_" + normalize_name(cls.__name__)
     modified_time = os.path.getmtime(os.path.expanduser(fname))
     last_modified = get_last_mod_time(name)
     if modified_time != last_modified or not from_cache:
         data = just.read(fname)
         data = cls.handle_json(data, **kwargs)
         data = pd.DataFrame(data)
         if nrows is None:
             save_df(data, name)
             save_last_mod_time(modified_time, name)
     else:
         data = load_df(name)
     if nrows is not None:
         data = data.iloc[-nrows:]
     return data
def get_linked_data(x):
    path = x["path"]
    if path in CACHE:
        return CACHE[path]
    try:
        html = just.read(path)
    except EOFError:
        CACHE[path] = None
        return None
    if not html.strip():
        CACHE[path] = None
        return None
    tree = lxml.html.fromstring(html)
    res = tree.xpath("//input[@name='q' and @type='text']")
    if not res:
        linked_data = None
    else:
        linked_data = {"title": res[0].value}
    CACHE[path] = linked_data
    return linked_data
Beispiel #20
0
def main():
    conf_path = "~/nostalgia_data/config/fitbit/config.json"
    if not just.exists(conf_path):
        webbrowser.open("https://dev.fitbit.com/apps/new")
        webbrowser.open(
            "https://raw.githubusercontent.com/nostalgia-dev/nostalgia_fitbit/master/docs/fitbit_app.png"
        )
        client_id = getpass.getpass("Client ID: ")
        client_secret = getpass.getpass("Client Secret: ")
        info = {"client_id": client_id, "client_secret": client_secret}
        just.write(info, conf_path)
        print("Saved in:", conf_path)
    config = just.read(conf_path)
    if not config["client_id"] or not config["client_secret"]:
        msg = "Fill in a value for client_id and client_secret in '{}'".format(
            conf_path)
        raise ValueError(msg)

    fa = FitbitAuth(client_id=config['client_id'],
                    client_secret=config['client_secret'])
    fa.ensure_access_token()

    try:
        f = Fitbit(access_token=fa.access_token['access_token'])
        print(json.dumps(f.profile, indent=2))
    except requests.exceptions.HTTPError as e:
        print(e.response.status_code)
        if e.response.status_code == 429:
            print(e.response.headers)
            return
        raise

    export = FitbitExport(f, profile=f.profile)

    export.sync_sleep()
    export.sync_heartrate_intraday()
Beispiel #21
0
import gzip
import os
import just
from auto_extract import parse_article
import tqdm
from urllib.parse import urlparse
import tldextract
from utils import KEYS_TO_KEEP

for x in tqdm.tqdm(just.glob("/home/pascal/.nostalgia/meta/v1/*.json")):
    print("processing", x)
    meta = just.read(x)
    if "extruct" in meta:
        print("skipping", x)
        continue
    html_path = "/home/pascal/.nostalgia/html/" + x.split("/")[-1].rstrip(
        ".json") + ".html.gz"
    if os.path.exists(html_path):
        with gzip.GzipFile(html_path, "r") as f:
            html = f.read()
        article = parse_article(html, meta["url"])
        meta = article.to_dict(keys=KEYS_TO_KEEP, skip_if_empty=True)
        just.write(meta, x)
        os.system("touch '{}' -r '{}'".format(x, html_path))
        print("done", x)
Beispiel #22
0
def get_processed_files(name):
    path = "~/nostalgia_data/seen/" + slugify(name) + ".json"
    return set(just.read(path, no_exist=[]))
Beispiel #23
0
def get_newline_count(name):
    """ counts by row numbers in a file """
    path = "~/nostalgia_data/seen/" + slugify(name) + ".json"
    return just.read(path, no_exist=0)
Beispiel #24
0
def get_last_latest_file(name):
    path = "~/nostalgia_data/seen/" + slugify(name) + ".json"
    return just.read(path, no_exist=0)
Beispiel #25
0
import just
usm = just.read("data/money/us.json")

usd_min = set([x[0] for x in usm['symbols']["$"]] +
              [x[0] for x in usm['currencies']["dollar"]] +
              [x[0] for x in usm['currencies']["bucks"]])

usm['keywords'] = {k: v for k, v in usm['keywords'].items() if v in usd_min}
del usm['keywords']["Tongan"]
usm['symbols'] = {"$": usm['symbols']['$']}
usm['currencies'] = {
    "dollar": usm['currencies']['dollar'],
    "bucks": usm['currencies']['bucks']
}
usm['abbrevs'] = [x for x in usm['abbrevs'] if x in usm['keywords'].values()]
usm['abbrevs'].remove("TOP")

usm = just.write(usm, "data/money/us_min.json")

###
import just
usm = just.read("/Users/pascal/egoroot/natura/data/money/us.json")

eur_min = set([x[0] for x in usm['symbols']["$"]] +
              [x[0] for x in usm['currencies']["dollar"]] +
              [x[0] for x in usm['currencies']["bucks"]] +
              [x[0] for x in usm['symbols']["€"]] +
              [x[0] for x in usm['currencies']["euro"]])

usm['keywords'] = {k: v for k, v in usm['keywords'].items() if v in eur_min}
usm['symbols'] = {"$": usm['symbols']['$'], "€": usm['symbols']["€"]}
Beispiel #26
0
def replace_ndf_class():
    ndf_replace = "class NDF:"
    original = just.read(NDF_PATH)
    ndf = [x for x in original.split("\n") if x.startswith("class NDF")][0]
    just.write(original.replace(ndf, ndf_replace), NDF_PATH)
    return original
Beispiel #27
0
def test_unsuccesful_read():
    assert just.read("A" * 100, 42) == 42
Beispiel #28
0
                cache[code] = just.read(f"~/emoji_picker_images/{code}.base64",
                                        unknown_type="txt")
            imgs[i].update(data=cache[code])
        else:
            txt = ""
            imgs[i].update(data="")
        inps[i].update(value=txt, text_color=color)


while True:
    event, values = window.read()
    print(event)
    do_search = True
    if emojis is None:
        print("loading emoji")
        emojis = just.read("~/emojis2.txt").split("\n")
    if event in (None, "Cancel"):  # if user closes window or clicks cancel
        break
    elif event.startswith("Ok"):
        do_search = False
    elif event.startswith("Control"):
        if last_key.startswith("BackSpace") or event == "\x7f":
            letters = []
            sofar.update(value="".join(letters))
            search()
    elif event.startswith("Shift"):
        do_search = False
    elif event.startswith("Alt"):
        do_search = False
    elif event.startswith("Super"):
        do_search = False
Beispiel #29
0
 def read(self, index):
     return just.read(self.path[index])
Beispiel #30
0

def write_error(fname):
    with open("/home/pascal/csvres.jsonl", "a") as f:
        f.write(json.dumps({"meta": {"fname": fname}, "error": True}) + "\n")


if __name__ == "__main__":
    tot = 0
    results = []
    done = set()
    with open("/home/pascal/csvres.jsonl") as f:
        for line in f:
            done.add(json.loads(line)["meta"]["fname"])

    lines = just.read("~/csvlist.txt").split("\n")
    for line in tqdm.tqdm(random.sample(lines, len(lines))):
        if line in done:
            continue
        try:
            mb = os.path.getsize(line) / 1000 / 1000
        except FileNotFoundError:
            done.add(line)
            write_error(line)
            continue
        # tot += mb
        # if mb > 1:
        #     print(mb, line)
        df, meta, X = get_meta(line)
        if X is not None:
            # print(mb, line)