Esempio n. 1
0
def parse_input(input_file_or_name):

    if input_file_or_name.endswith(".json"):

        json_path = input_file_or_name

        with open(json_path, "r") as json_file:
            data_set_dictionary = json.load(json_file)

        name = _base_name(json_path)

        if "URLs" not in data_set_dictionary:

            if "values" in data_set_dictionary:
                json_directory = os.path.dirname(json_path)
                data_set_dictionary["values"] = os.path.join(
                    json_directory, data_set_dictionary["values"])
            else:
                raise KeyError("Missing path or URL to values.")

            if "labels" in data_set_dictionary:
                json_directory = os.path.dirname(json_path)
                data_set_dictionary["labels"] = os.path.join(
                    json_directory, data_set_dictionary["labels"])

    elif os.path.isfile(input_file_or_name):
        file_path = input_file_or_name
        filename = os.path.basename(file_path)
        file_extension = extension(filename)
        data_format = file_extension[1:] if file_extension else None
        name = _base_name(file_path)
        data_set_dictionary = {"values": file_path, "format": data_format}
    else:
        name = input_file_or_name
        name = normalise_string(name)
        data_set_dictionary = None

    return name, data_set_dictionary
Esempio n. 2
0
def acquire_data_set(title, urls, directory):

    paths = {}

    if not urls:
        return paths

    if not os.path.exists(directory):
        os.makedirs(directory)

    for values_or_labels in urls:
        paths[values_or_labels] = {}

        for kind in urls[values_or_labels]:

            url = urls[values_or_labels][kind]

            if not url:
                paths[values_or_labels][kind] = None
                continue

            url_filename = os.path.split(url)[-1]
            file_extension = extension(url_filename)

            filename = "-".join(
                map(normalise_string, [title, values_or_labels, kind]))
            path = os.path.join(directory, filename) + file_extension

            paths[values_or_labels][kind] = path

            if not os.path.isfile(path):

                if url.startswith("."):
                    raise Exception(
                        "Data set file have to be manually placed in "
                        "correct folder.")
                if os.path.isfile(url):

                    print("Copying {} for {} set.".format(
                        values_or_labels, kind, title))
                    start_time = time()

                    copy_file(url, path)

                    duration = time() - start_time
                    print("Data set copied ({}).".format(
                        format_duration(duration)))
                    print()

                else:

                    print("Downloading {} for {} set.".format(
                        values_or_labels, kind, title))
                    start_time = time()

                    download_file(url, path)

                    duration = time() - start_time
                    print("Data set downloaded ({}).".format(
                        format_duration(duration)))
                    print()

    return paths