Beispiel #1
0
def write_csv_file(root_key, filename, description):

    data_path = os.path.join(root_key, filename)
    mkdir_p(root_key)
    open(data_path, 'a+').close()
    key = data_path[:data_path.
                    rfind('.')] if data_path.rfind('.') > 0 else data_path

    DATA_STORE.add_file(key, data_path, description, force=True)

    return data_path
Beispiel #2
0
def register_to_datastore(data_path, root_key, description):
    root_length = len(data_path.split('/'))
    new_keys: List[str] = []
    DATA_STORE.create_key(root_key, '', force=True)
    for root, _, filenames in os.walk(data_path):
        for filename in filenames:
            if not filename.endswith(".zip"):
                key = '/'.join(
                    os.path.join(root, filename).split('/')[root_length:])
                key = key[:key.rfind('.')] if key.rfind('.') > 0 else key
                new_keys.append(key)
                DATA_STORE.add_file(os.path.join(root_key, key),
                                    os.path.join(root, filename),
                                    description,
                                    force=True)
    return new_keys
Beispiel #3
0
def maybe_download_and_store_single_file(url: str,
                                         key: str,
                                         description: str = None,
                                         postprocess=None,
                                         **kwargs) -> str:
    if not DATA_STORE.is_valid(key):
        # This is where the hard work happens
        # First, we have to download the file into the working directory
        if postprocess is None:
            data_path = maybe_download(
                url.split('/')[-1], url, DATA_STORE.working_directory)
        else:
            data_path = maybe_download(url.split('/')[-1],
                                       url,
                                       DATA_STORE.working_directory,
                                       postprocess=postprocess,
                                       **kwargs)
        DATA_STORE.add_file(key, data_path, description, force=True)
    return key
Beispiel #4
0
def maybe_download_and_store_google_drive(file_pair: Dict[str, str],
                                          root_key: str,
                                          description: str = None,
                                          force_download: bool = False,
                                          use_subkeys=True,
                                          **kwargs) -> List[str]:
    old_keys: List[str] = []
    if not force_download and DATA_STORE.is_valid(
            root_key) and validate_subkeys(root_key, old_keys):
        return old_keys

    keys = []
    DATA_STORE.create_key(root_key, 'root.key', force=True)

    for file_name in file_pair:
        log_message("Downloading " + file_name)
        file_id = file_pair[file_name]
        file_dest = os.path.join(DATA_STORE.working_directory, file_name)
        data_path = maybe_download_google_drive(file_id,
                                                file_dest,
                                                force_download=force_download)
        data_path = post_process(data_path)
        log_message("Decompressed " + file_name + "to " + data_path)
        if os.path.isdir(data_path):
            if use_subkeys:
                _keys = register_to_datastore(data_path, root_key, description)
                keys.extend(_keys)
            else:
                data_key = os.path.join(root_key, file_name.split(".zip")[0])
                DATA_STORE.add_folder(data_key, data_path, force=True)
                keys.append(data_key)
        else:
            _key = os.path.join(root_key, file_name.split(".")[0])
            DATA_STORE.add_file(_key, data_path, description, force=True)
            keys.append(_key)
        log_message("Completed " + file_name)
    DATA_STORE.create_key(root_key, 'root.key', force=True)

    return [k for k in keys] + [root_key]