Ejemplo n.º 1
0
    def __store_data(self):
        def is_zipped(name):
            return name.endswith(".gz")

        def maybe_zipped_path(name, path):
            if is_zipped(name):
                return path + ".gz"
            else:
                return path

        def maybe_unzip(path):
            if is_zipped(path):
                file_utils.unzip(path)

        for file_key in self.content["data"]:
            unzipped_file_path = self.directory + file_key + ".fastq"
            if self.content["method"] == self.constants["dataset"]["URL"]:
                url = self.content["data"][file_key]["name"]
                file_path = maybe_zipped_path(url, unzipped_file_path)
                file_utils.download(url, file_path)
                maybe_unzip(file_path)
            else:
                file = self.files[file_key]
                name = file.filename
                file_path = maybe_zipped_path(name, unzipped_file_path)
                file.save(file_path)
                maybe_unzip(file_path)
                self.content["data"][file_key]["name"] = name
            self.content["data"][file_key]["path"] = unzipped_file_path
Ejemplo n.º 2
0
def get_p_falciparum(genome_id, file_path):
    url = "http://bp1.s3.amazonaws.com/malaria.tar.bz2"
    download_path = reference_directory + "malaria.tar.bz2"
    file_utils.download(url, download_path)
    print("Unzipping {}...".format(genome_id), flush=True)
    unzipped_directory = file_utils.unzip(download_path)
    os.rename(unzipped_directory + "/genome_sequence_pfal.fa", file_path)
    file_utils.delete(download_path)
    file_utils.delete(unzipped_directory)
Ejemplo n.º 3
0
def get_human_genome(genome_id, file_path):
    url = "http://hgdownload.soe.ucsc.edu/goldenPath/"
    url += "{0}/bigZips/{0}.2bit".format(genome_id)
    two_bit_path = file_path + ".2bit"
    started_tasks.append(two_bit_path)
    file_utils.download(url, two_bit_path)
    finished_tasks.append(two_bit_path)
    # Convert .2bit file to .fa
    print("Extracting {} from 2bit file...".format(genome_id), flush=True)
    os.system("chmod +x {0}twoBitToFa && {0}twoBitToFa {1} {2}".format(
        reference_directory, two_bit_path, file_path))
    file_utils.delete(two_bit_path)
Ejemplo n.º 4
0
    def get_file(file_id, direction, directory):
        print("Downloading {} file...".format(direction), flush=True)
        zip_name = "{}.fastq.gz".format(file_id)
        url = "https://www.encodeproject.org/files/{}/@@download/{}".format(
            file_id, zip_name)
        download_path = directory + "/" + zip_name
        file_utils.download(url, download_path)
        print("Unzipping {} file...".format(direction), flush=True)
        file_utils.unzip(download_path)
        file_utils.delete(download_path)

        original_name = "{}.fastq".format(file_id)
        file_origin = "{}/{}".format(directory, original_name)
        file_destination = "{}/{}{}".format(directory, direction,
                                            fastq_file_ending)
        os.rename(file_origin, file_destination)
        return original_name, file_destination
Ejemplo n.º 5
0
def get_baruzzo(dataset, directory):
    zip_name = "{}.tar.bz2".format(dataset["file_name"])
    url = "http://bp1.s3.amazonaws.com/{}".format(zip_name)
    download_path = directory + "/" + zip_name
    file_utils.download(url, download_path)

    print("Unzipping {}...".format(dataset["name"]), flush=True)
    file_utils.unzip(download_path)

    # Move files to /beers directory
    beers_directory = directory + "/beers/"
    file_utils.create_directory(beers_directory)
    for file_name in os.listdir(directory):
        file_path = directory + "/" + file_name
        if not os.path.isdir(file_path) and not file_path == download_path:
            shutil.move(file_path, beers_directory + file_name)

    # Move FASTQ files to root and rename
    def setup_file(direction):
        file_name = "{}.{}.fa".format(dataset["id"], direction)
        file_origin = beers_directory + file_name
        file_destination = "{}/{}{}".format(directory, direction,
                                            fastq_file_ending)
        os.rename(file_origin, file_destination)
        return file_name, file_destination

    forward_file_name, forward_file_path = setup_file(
        constants["dataset"]["FORWARD"])
    reverse_file_name, reverse_file_path = setup_file(
        constants["dataset"]["REVERSE"])

    # Move CIG file to root and rename
    truth_file_name = "{}.cig".format(dataset["id"])
    truth_file_path = directory + "/truth.cig"
    os.rename(beers_directory + truth_file_name, truth_file_path)

    file_utils.delete(download_path)
    file_utils.delete(beers_directory)

    write_dataset_json({
        "id": dataset["id"],
        "name": dataset["name"],
        "readLength": "100",
        "data": {
            constants["dataset"]["FORWARD"]: {
                "name": forward_file_name,
                "path": forward_file_path,
            },
            constants["dataset"]["REVERSE"]: {
                "name": reverse_file_name,
                "path": reverse_file_path,
            }
        },
        "evaluation": {
            "type": "beers",
            "truth_file": {
                "name": truth_file_name,
                "path": truth_file_path
            }
        }
    })