예제 #1
0
    def __init__(self, *args, **kwrds):
        PipelineStep.__init__(self, *args, **kwrds)
        self.chunks_in_flight = threading.Semaphore(self.additional_attributes['chunks_in_flight'])
        self.chunks_result_dir_local = os.path.join(self.output_dir_local, "chunks")
        self.chunks_result_dir_s3 = os.path.join(self.output_dir_s3, "chunks")
        self.iostream_upload = multiprocessing.Semaphore(MAX_CONCURRENT_CHUNK_UPLOADS)

        command.execute("mkdir -p %s" % self.chunks_result_dir_local)
예제 #2
0
 def __init__(self, *args, **kwrds):
     PipelineStep.__init__(self, *args, **kwrds)
     # TODO: (tmorse) remove service compatibility https://jira.czi.team/browse/IDSEQ-2568
     self.alignment_algorithm = self.additional_attributes.get(
         "alignment_algorithm", self.additional_attributes.get("service"))
     assert self.alignment_algorithm in ("gsnap", "rapsearch2")
     self.chunks_in_flight_semaphore = threading.Semaphore(
         MAX_CHUNKS_IN_FLIGHT)
     self.chunks_result_dir_local = os.path.join(self.output_dir_local,
                                                 "chunks")
     self.chunks_result_dir_s3 = os.path.join(self.output_dir_s3, "chunks")
     self.batch_job_desc_bucket = get_batch_job_desc_bucket()
     command.make_dirs(self.chunks_result_dir_local)
 def __init__(self, *args, **kwrds):
     PipelineStep.__init__(self, *args, **kwrds)
     self.alignment_algorithm = self.additional_attributes.get("alignment_algorithm")
     assert self.alignment_algorithm in ("gsnap", "rapsearch2")
     self.chunks_in_flight_semaphore = threading.Semaphore(MAX_CHUNKS_IN_FLIGHT)
     self.chunks_result_dir_local = os.path.join(self.output_dir_local, "chunks")
     self.chunks_result_dir_s3 = os.path.join(self.output_dir_s3, "chunks")
     self.batch_job_desc_bucket = get_batch_job_desc_bucket()
     self.is_local_run = bool(self.additional_attributes.get("run_locally"))
     self.genome_name = self.additional_attributes.get("genome_name", "nt_k16")
     self.index = self.additional_files.get("index")
     if self.is_local_run:
         assert self.index, "local runs require an index to be passed in"
     else:
         assert not self.index, "passing in an index is not supported for remote runs"
         command.make_dirs(self.chunks_result_dir_local)
예제 #4
0
 def fetch_input_files_from_s3(input_files, input_dir_s3, result_dir_local):
     for f in input_files:
         s3_file = os.path.join(input_dir_s3, f)
         local_file = os.path.join(result_dir_local, f)
         local_dir = os.path.dirname(local_file)
         command.make_dirs(local_dir)
         # copy the file over
         output_file = idseq_dag.util.s3.fetch_from_s3(s3_file, local_dir, allow_s3mi=True)
         if output_file:
             # write the done_file
             done_file = PipelineStep.done_file(local_file)
             fmt_now = datetime.datetime.now(tz=pytz.UTC).strftime("%a %b %e %H:%M:%S %Z %Y")
             command.write_text_to_file(fmt_now, done_file)
         else:
             raise RuntimeError(f"{s3_file} likely doesn't exist")
예제 #5
0
 def fetch_input_files_from_s3(input_files, input_dir_s3, result_dir_local):
     for f in input_files:
         s3_file = os.path.join(input_dir_s3, f)
         local_file = os.path.join(result_dir_local, f)
         local_dir = os.path.dirname(local_file)
         command.execute("mkdir -p %s" % local_dir)
         # copy the file over
         output_file = idseq_dag.util.s3.fetch_from_s3(s3_file,
                                                       local_dir,
                                                       allow_s3mi=True)
         if output_file:
             # write the done_file
             done_file = PipelineStep.done_file(local_file)
             command.execute("date > %s" % done_file)
         else:
             raise RuntimeError(f"{s3_file} likely doesn't exist")