예제 #1
0
 def __execute_step(self, action, experiment):
     action_handler_id = experiment.get("pipeline")[action]["id"]
     action_handler = next(
         (service
          for service in self.services if service.id == action_handler_id),
         None)
     file_path = self.data_handler.cache.lookup(experiment, action)
     if file_path:  # Step is cached
         # Check if evaluation was run, maybe run evaluation
         evaluation_handler = self.__evaluation_handler(experiment, action)
         has_evaluation = next((file for file in os.listdir(file_path)
                                if file.startswith("Evaluation")), False)
         if evaluation_handler and not has_evaluation:
             experiment.start_action(action)
             experiment.mark_cached(action)
             self.__run_evaluation(file_path, experiment,
                                   evaluation_handler, action_handler)
             experiment.complete_action(action)
         else:
             experiment.start_action(action, cached=True)
     else:  # Run step
         experiment.start_action(action)
         file_path = self.data_handler.cache.create_path(experiment, action)
         file_utils.create_directory(file_path)
         action_handler.run({
             "docker_client": self.docker_client,
             "data_handler": self.data_handler,
             "experiment": experiment,
             "destination": file_path
         })
         self.__run_evaluation_if_specified(file_path, experiment, action,
                                            action_handler)
         experiment.complete_action(action)
     experiment.add_download(action, file_path)
     return experiment
예제 #2
0
def get_datasets():
    for dataset in rna_seq_data:
        if ONLY_SIMULATED and not dataset["id"].startswith("simulated"):
            print("Skipping {} (only simulated)".format(dataset["name"]))
            continue
        if ONLY_GIAB and dataset["id"] != "GM12878":
            print("Skipping {} (only giab)".format(dataset["name"]))
            continue
        dataset_directory = datasets_directory + dataset["id"]
        dataset_getter = dataset["getter"]
        if not os.path.isdir(dataset_directory):
            file_utils.create_directory(dataset_directory)
            log_task_start(dataset["name"], dataset_directory)
            dataset_getter(dataset, dataset_directory)
            log_task_end(dataset["name"], dataset_directory)
        else:
            log_data_present(dataset["name"])
예제 #3
0
 def prepare_indexing(self, parameters):
     file_utils.create_directory(parameters["genome_index_path"])
예제 #4
0
def get_baruzzo(dataset, directory):
    zip_name = "{}.tar.bz2".format(dataset["file_name"])
    url = "http://bp1.s3.amazonaws.com/{}".format(zip_name)
    download_path = directory + "/" + zip_name
    file_utils.download(url, download_path)

    print("Unzipping {}...".format(dataset["name"]), flush=True)
    file_utils.unzip(download_path)

    # Move files to /beers directory
    beers_directory = directory + "/beers/"
    file_utils.create_directory(beers_directory)
    for file_name in os.listdir(directory):
        file_path = directory + "/" + file_name
        if not os.path.isdir(file_path) and not file_path == download_path:
            shutil.move(file_path, beers_directory + file_name)

    # Move FASTQ files to root and rename
    def setup_file(direction):
        file_name = "{}.{}.fa".format(dataset["id"], direction)
        file_origin = beers_directory + file_name
        file_destination = "{}/{}{}".format(directory, direction,
                                            fastq_file_ending)
        os.rename(file_origin, file_destination)
        return file_name, file_destination

    forward_file_name, forward_file_path = setup_file(
        constants["dataset"]["FORWARD"])
    reverse_file_name, reverse_file_path = setup_file(
        constants["dataset"]["REVERSE"])

    # Move CIG file to root and rename
    truth_file_name = "{}.cig".format(dataset["id"])
    truth_file_path = directory + "/truth.cig"
    os.rename(beers_directory + truth_file_name, truth_file_path)

    file_utils.delete(download_path)
    file_utils.delete(beers_directory)

    write_dataset_json({
        "id": dataset["id"],
        "name": dataset["name"],
        "readLength": "100",
        "data": {
            constants["dataset"]["FORWARD"]: {
                "name": forward_file_name,
                "path": forward_file_path,
            },
            constants["dataset"]["REVERSE"]: {
                "name": reverse_file_name,
                "path": reverse_file_path,
            }
        },
        "evaluation": {
            "type": "beers",
            "truth_file": {
                "name": truth_file_name,
                "path": truth_file_path
            }
        }
    })
예제 #5
0
        if not os.path.isdir(dataset_directory):
            file_utils.create_directory(dataset_directory)
            log_task_start(dataset["name"], dataset_directory)
            dataset_getter(dataset, dataset_directory)
            log_task_end(dataset["name"], dataset_directory)
        else:
            log_data_present(dataset["name"])


###################
# SCRIPT EXECUTION
###################

print("", flush=True)
print("Downloading data", flush=True)
print("", flush=True)

file_utils.create_directory(reference_directory)
file_utils.create_directory(datasets_directory)

try:
    get_tools()
    get_genomes()
    get_datasets()
    remove_tools()
finally:
    for path in started_tasks:
        if not path in finished_tasks:
            print("An error occured, deleting {}".format(path))
            file_utils.delete(path)
예제 #6
0
 def __init__(self, directory, Instance):
     self.directory = directory
     self.Instance = Instance
     file_utils.create_directory(self.directory)