def __store_data(self): def is_zipped(name): return name.endswith(".gz") def maybe_zipped_path(name, path): if is_zipped(name): return path + ".gz" else: return path def maybe_unzip(path): if is_zipped(path): file_utils.unzip(path) for file_key in self.content["data"]: unzipped_file_path = self.directory + file_key + ".fastq" if self.content["method"] == self.constants["dataset"]["URL"]: url = self.content["data"][file_key]["name"] file_path = maybe_zipped_path(url, unzipped_file_path) file_utils.download(url, file_path) maybe_unzip(file_path) else: file = self.files[file_key] name = file.filename file_path = maybe_zipped_path(name, unzipped_file_path) file.save(file_path) maybe_unzip(file_path) self.content["data"][file_key]["name"] = name self.content["data"][file_key]["path"] = unzipped_file_path
def get_p_falciparum(genome_id, file_path): url = "http://bp1.s3.amazonaws.com/malaria.tar.bz2" download_path = reference_directory + "malaria.tar.bz2" file_utils.download(url, download_path) print("Unzipping {}...".format(genome_id), flush=True) unzipped_directory = file_utils.unzip(download_path) os.rename(unzipped_directory + "/genome_sequence_pfal.fa", file_path) file_utils.delete(download_path) file_utils.delete(unzipped_directory)
def get_human_genome(genome_id, file_path): url = "http://hgdownload.soe.ucsc.edu/goldenPath/" url += "{0}/bigZips/{0}.2bit".format(genome_id) two_bit_path = file_path + ".2bit" started_tasks.append(two_bit_path) file_utils.download(url, two_bit_path) finished_tasks.append(two_bit_path) # Convert .2bit file to .fa print("Extracting {} from 2bit file...".format(genome_id), flush=True) os.system("chmod +x {0}twoBitToFa && {0}twoBitToFa {1} {2}".format( reference_directory, two_bit_path, file_path)) file_utils.delete(two_bit_path)
def get_file(file_id, direction, directory): print("Downloading {} file...".format(direction), flush=True) zip_name = "{}.fastq.gz".format(file_id) url = "https://www.encodeproject.org/files/{}/@@download/{}".format( file_id, zip_name) download_path = directory + "/" + zip_name file_utils.download(url, download_path) print("Unzipping {} file...".format(direction), flush=True) file_utils.unzip(download_path) file_utils.delete(download_path) original_name = "{}.fastq".format(file_id) file_origin = "{}/{}".format(directory, original_name) file_destination = "{}/{}{}".format(directory, direction, fastq_file_ending) os.rename(file_origin, file_destination) return original_name, file_destination
def get_baruzzo(dataset, directory): zip_name = "{}.tar.bz2".format(dataset["file_name"]) url = "http://bp1.s3.amazonaws.com/{}".format(zip_name) download_path = directory + "/" + zip_name file_utils.download(url, download_path) print("Unzipping {}...".format(dataset["name"]), flush=True) file_utils.unzip(download_path) # Move files to /beers directory beers_directory = directory + "/beers/" file_utils.create_directory(beers_directory) for file_name in os.listdir(directory): file_path = directory + "/" + file_name if not os.path.isdir(file_path) and not file_path == download_path: shutil.move(file_path, beers_directory + file_name) # Move FASTQ files to root and rename def setup_file(direction): file_name = "{}.{}.fa".format(dataset["id"], direction) file_origin = beers_directory + file_name file_destination = "{}/{}{}".format(directory, direction, fastq_file_ending) os.rename(file_origin, file_destination) return file_name, file_destination forward_file_name, forward_file_path = setup_file( constants["dataset"]["FORWARD"]) reverse_file_name, reverse_file_path = setup_file( constants["dataset"]["REVERSE"]) # Move CIG file to root and rename truth_file_name = "{}.cig".format(dataset["id"]) truth_file_path = directory + "/truth.cig" os.rename(beers_directory + truth_file_name, truth_file_path) file_utils.delete(download_path) file_utils.delete(beers_directory) write_dataset_json({ "id": dataset["id"], "name": dataset["name"], "readLength": "100", "data": { constants["dataset"]["FORWARD"]: { "name": forward_file_name, "path": forward_file_path, }, constants["dataset"]["REVERSE"]: { "name": reverse_file_name, "path": reverse_file_path, } }, "evaluation": { "type": "beers", "truth_file": { "name": truth_file_name, "path": truth_file_path } } })