Exemple #1
0
    def __init__(self, id, data):
        super(BaseBuildJob, self).__init__()
        self.id = id
        self.data = data
        self.package_path = os.path.join(root_path, self.id)
        self.package_cache = os.path.join(self.package_path, '.bacon.d')
        self.stored_fingerprint_path = os.path.join(self.package_cache, "fingerprint")
        self.dependency_jobs = []

        make_dir_if_needed(self.package_cache)
Exemple #2
0
    def __init__(self, id, data):
        super(BaseBuildJob, self).__init__()
        self.id = id
        self.data = data
        self.package_path = os.path.join(root_path, self.id)
        self.package_cache = os.path.join(self.package_path, '.bacon.d')
        self.stored_fingerprint_path = os.path.join(self.package_cache,
                                                    "fingerprint")
        self.dependency_jobs = []

        make_dir_if_needed(self.package_cache)
Exemple #3
0
def start_build(sra_id, wait_time, buffer_size_gb, container_type, required_ram_gb, available_ram_gb):
    input_dir = download_dir(sra_id)
    output_dir = build_dir(sra_id)
    num_cores = max(4, round(required_ram_gb / 3.75))
    logging.info(f'[{sra_id}] Starting build from {input_dir} to {output_dir}, buffer={round(buffer_size_gb, 2)}GB '
                 f'on {num_cores} cores')
    util.make_dir_if_needed(build_dir(sra_id))
    log_file_name = os.path.join(build_dir(sra_id), 'build.log')
    log_file = util.TeeLogger(log_file_name)
    write_log_header(log_file, 'build', sra_id, required_ram_gb, available_ram_gb)
    build_processes[sra_id] = (subprocess.Popen(
        ['./build.sh', sra_id, input_dir, output_dir, str(buffer_size_gb), str(num_cores), container_type], stdout=log_file,
        stderr=log_file), time.time(), wait_time, required_ram_gb)
    return True
Exemple #4
0
def start_download(download_resp):
    if 'id' not in download_resp:
        logging.info('No more downloads available. We\'re almost done!')
        return
    sra_id = download_resp['id']
    pending_processes.append(sra_id)
    dump_pending(pending_processes)
    util.make_dir_if_needed(download_dir(sra_id))
    log_file_name = os.path.join(download_dir(sra_id), 'download.log')
    log_file = util.TeeLogger(log_file_name, 'Stage')
    bucket = '0'
    if args.source == 'ncbi':
        if 'bucket' not in download_resp:
            logging.info(f'[{sra_id}] Specified NCBI as download source, but server response has no "bucket" field. '
                         f'Will download via HTTP instead of GCS')
        else:
            bucket = download_resp['bucket']
    download_processes[sra_id] = (
        subprocess.Popen(['./download.sh', args.source, sra_id, download_dir_base(), bucket], stdout=log_file,
                         stderr=log_file), time.time())
    sra_info[sra_id] = (time.time(),)
Exemple #5
0
    def __init__(self, id, data):
        super(JavaModuleBuildJob, self).__init__(id, data)
        self.classes_cache_directory = os.path.join(self.package_cache, "classes")
        self.test_classes_cache_directory = os.path.join(self.package_cache, "test-classes")
        self.archive_cache = os.path.join(self.package_cache, "dist")
        self.compile_dependencies = []
        self.test_dependencies = []

        self.parse_dependencies(id, "dependencies", self.compile_dependencies)
        self.parse_dependencies(id, "test-dependencies", self.test_dependencies)

        make_dir_if_needed(self.classes_cache_directory)
        make_dir_if_needed(self.test_classes_cache_directory)
        make_dir_if_needed(self.archive_cache)
Exemple #6
0
def check_env():
    """ Make sure all the necessary software is in place to successfully run the client and create working
    directories """

    util.make_dir_if_needed(download_dir_base())
    util.make_dir_if_needed(build_dir_base())
    util.make_dir_if_needed(clean_dir_base())

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG, datefmt='%Y-%m-%d %H:%M:%S')
    file_handler = logging.FileHandler(f'{args.output_dir}/client.log')
    file_handler.setLevel(logging.DEBUG)
    formatter = logging.Formatter('%(asctime)s %(message)s')
    file_handler.setFormatter(formatter)
    logging.getLogger().addHandler(file_handler)

    if subprocess.call(['./prereq.sh']) != 0:
        logging.error("Some prerequisites are missing on this machine. Bailing out.")
        exit(1)
Exemple #7
0
def parse_java_build_file(id, data):
    if data.has_key('dependencies'):
        for dependency in data['dependencies']:
            if len(dependency.split(":")) != 3:
                parse_build_file(dependency)
    return JavaModuleBuildJob(id, data)


root_path = sys.argv[1]

if len(sys.argv) > 2:
    task = sys.argv[2]
else:
    task = "build"

make_dir_if_needed(os.path.expanduser("~/.bacon.d"))
parse_build_file(root_path)

# print "build order:"
# print yaml.dump(build_order)

# print "task: %s" % task

if task == "compile":
    goals = ["compile"]
elif task == "clean":
    goals = ["clean"]
elif task == "test":
    goals = ["compile", "compileTest", "test"]
elif task == "run":
    goals = ["compile", "compileTest", "test", "run"]
Exemple #8
0
def parse_java_build_file(id, data):
    if data.has_key('dependencies'):
        for dependency in data['dependencies']:
            if len(dependency.split(":")) != 3:
                parse_build_file(dependency)
    return JavaModuleBuildJob(id, data)

root_path = sys.argv[1]

if len(sys.argv) > 2:
    task = sys.argv[2]
else:
    task = "build"

make_dir_if_needed(os.path.expanduser("~/.bacon.d"))
parse_build_file(root_path)

# print "build order:"
# print yaml.dump(build_order)

# print "task: %s" % task

if task == "compile":
    goals = ["compile"]
elif task == "clean":
    goals = ["clean"]
elif task == "test":
    goals = ["compile", "compileTest", "test"]
elif task == "run":
    goals = ["compile", "compileTest", "test", "run"]
Exemple #9
0
def check_status():
    global must_quit
    if must_quit:
        return False
    total_reserved_ram_gb = 0  # how much memory all active processes need
    completed_downloads = set()
    for sra_id, (download_process, start_time) in download_processes.items():
        return_code = download_process.poll()
        is_timed_out = (time.time() - start_time) > 120 * 60
        if return_code is not None or is_timed_out:
            if os.path.exists(os.path.join(download_dir(sra_id), 'code')):
                return_code = int(open(os.path.join(download_dir(sra_id), 'code')).read())
            elif is_timed_out:
                logging.warning(f'[{sra_id}] Download timed out after {time.time() - start_time} seconds.')
                return_code = 254
            else:
                logging.error(f'[{sra_id}] Download process did not provide a return code. Assuming error')
                return_code = 255
            completed_downloads.add(sra_id)
            log_file_name = os.path.join(download_dir(sra_id), 'download.log')
            logging.info(f'[{sra_id}] Download finished with output\n {util.tab(open(log_file_name).readlines())}\n\n\n')
            log_new_file_name = os.path.join(clean_dir(sra_id), 'download.log')
            util.make_dir_if_needed(clean_dir(sra_id))
            os.rename(log_file_name, log_new_file_name)

            download_path = download_dir(sra_id)
            sra_dir = os.path.join(download_path, 'sra')
            download_size_mb = util.dir_size_MB(sra_dir)
            size_file = os.path.join(download_dir(sra_id), 'size')
            if os.path.exists(size_file):
                sra_size_mb = round(int(open(size_file).read()) / 1e6, 2)
                logging.info(f'Downloaded sra files have {sra_size_mb}MB')
            else:
                logging.warning('Could not find size file. Reporting size -1')
                sra_size_mb = -1
            subprocess.run(['rm', '-rf', sra_dir])
            kmc_dir = os.path.join(download_path, 'kmc')
            kmc_size_mb = util.dir_size_MB(kmc_dir)
            success = True
            if return_code == 0:
                logging.info(f'[{sra_id}] Download completed successfully.')
                stats_file = os.path.join(download_path, 'stats')
                try:
                    with open(stats_file) as stats:
                        json_resp = json.loads(stats.read())
                    if '#k-mers_coverage' in json_resp and '#k-mers_below_min_threshold' in json_resp:
                        kmer_count_unique = int(json_resp['#Unique_counted_k-mers'])
                        kmer_coverage = int(json_resp['#k-mers_coverage'])
                        kmer_count_singletons = int(json_resp['#k-mers_below_min_threshold'])
                    else:
                        logging.warning(f'[{sra_id}] Invalid KMC stat files, assuming failure')
                        success = False
                except FileNotFoundError:
                    logging.warning(f'[{sra_id}] Could not find KMC stats file {stats_file}, baling out.')
                    success = False
            else:
                success = False
            if success:
                params = {'id': sra_id, 'time': int(time.time() - start_time), 'size_mb': sra_size_mb,
                          'download_size_mb': download_size_mb, 'kmc_size_mb': kmc_size_mb,
                          'kmer_count_unique': kmer_count_unique, 'kmer_coverage': kmer_coverage,
                          'kmer_count_singletons': kmer_count_singletons}
                sra_info[sra_id] = (
                    *sra_info[sra_id], sra_size_mb, kmer_count_unique, kmer_coverage, kmer_count_singletons)
                ack('download', params)
                waiting_builds[sra_id] = (time.time())
            else:
                logging.warning(f'[{sra_id}] Download failed. Removing {download_path}')
                subprocess.run(['rm', '-rf', download_path])
                params = {'id': sra_id, 'time': int(time.time() - start_time), 'size_mb': sra_size_mb,
                          'download_size_mb': download_size_mb, 'kmc_size_mb': kmc_size_mb, 'exit_code': return_code}
                nack('download', params)
        else:
            total_reserved_ram_gb += 2  # approximate 2GB of RAM for each download process (bc of KMC)
    for d in completed_downloads:
        del download_processes[d]

    completed_builds = set()
    used_cores = 0
    for sra_id, (build_process, start_time, wait_time, reserved_ram_gb) in build_processes.items():
        return_code = build_process.poll()

        if return_code is not None:
            completed_builds.add(sra_id)
            log_file_name = os.path.join(build_dir(sra_id), 'build.log')
            logging.info(f'[{sra_id}] Build finished with output\n {util.tab(open(log_file_name).readlines())}\n\n\n')
            log_new_file_name = os.path.join(clean_dir(sra_id), 'build.log')
            os.rename(log_file_name, log_new_file_name)

            # clean up the download path; if adding retries, do this only on success
            download_path = download_dir(sra_id)
            logging.info(f'[{sra_id}] Cleaning up {download_path}')
            subprocess.run(['rm', '-rf', download_path])

            build_path = build_dir(sra_id)
            build_size_mb = util.dir_size_MB(build_path)
            if return_code == 0:
                logging.info(f'[{sra_id}] Building graph completed successfully.')
                sanity = check_sanity(sra_id)
                params = {'id': sra_id, 'time': int(time.time() - start_time),
                          'wait_time': int(wait_time), 'size_mb': build_size_mb, 'sanity': sanity}
                ack('build', params)
                waiting_cleans[sra_id] = (time.time())
            else:
                logging.warning(f'[{sra_id}] Building graph failed. Removing {build_path}.')
                subprocess.run(['rm', '-rf', build_path])
                params = {'id': sra_id, 'time': int(time.time() - start_time), 'wait_time': int(wait_time),
                          'size_mb': build_size_mb, 'return_code': return_code}
                nack('build', params)
        else:
            total_reserved_ram_gb += reserved_ram_gb
            used_cores += max(4, round(reserved_ram_gb / 3.75))
    for d in completed_builds:
        del build_processes[d]

    completed_cleans = set()
    for sra_id, (clean_process, start_time, wait_time, reserved_ram_gb) in clean_processes.items():
        return_code = clean_process.poll()
        if return_code is not None:
            completed_cleans.add(sra_id)
            log_file_name = os.path.join(clean_dir(sra_id), 'clean.log')
            logging.info(f'[{sra_id}] Clean finished with output\n {util.tab(open(log_file_name).readlines())}\n\n\n')

            # clean up the original graph; if adding retries, do this only on success
            build_path = build_dir(sra_id)
            logging.info(f'[{sra_id}] Cleaning up {build_path}')
            subprocess.run(['rm', '-rf', build_path])

            cleaned_dir = clean_dir(sra_id)
            cleaned_size_mb = util.dir_size_MB(cleaned_dir)
            if return_code == 0:
                logging.info(f'[{sra_id}] Cleaning graph completed successfully.')

                params = {'id': sra_id, 'time': int(time.time() - start_time), 'wait_time': int(wait_time),
                          'size_mb': cleaned_size_mb}
                ack('clean', params)
                start_transfer(sra_id, cleaned_dir, 'clean')
            else:
                params = {'id': sra_id, 'time': int(time.time() - start_time), 'wait_time': int(wait_time),
                          'size_mb': cleaned_size_mb, 'return_code': return_code}
                nack('clean', params)
                logging.warning(f'[{sra_id}] Cleaning graph failed. Removing {cleaned_dir}')
                subprocess.run(['rm', '-rf', cleaned_dir])
        else:
            total_reserved_ram_gb += reserved_ram_gb
            used_cores += max(4, round(reserved_ram_gb / 3.75))
    for d in completed_cleans:
        del clean_processes[d]

    completed_transfers = set()
    for sra_id, (transfer_process, start_time) in transfer_processes.items():
        return_code = transfer_process.poll()
        if return_code is not None:
            completed_transfers.add(sra_id)
            # clean up the cleaned graph; if adding retries, do this only on success
            clean_path = clean_dir(sra_id)
            cleaned_size_mb = util.dir_size_MB(clean_path)
            logging.info(f'[{sra_id}] Cleaning up {clean_path}')
            subprocess.run(['rm', '-rf', clean_path])

            if return_code == 0:
                logging.info(f'[{sra_id}] Transferring graph completed successfully.')
                params = {'id': sra_id, 'time': int(time.time() - start_time),
                          'total_time': int(time.time() - sra_info[sra_id][0]), 'size_init_mb': sra_info[sra_id][1],
                          'size_final_mb': cleaned_size_mb}
                ack('transfer', params)
            else:
                logging.warning(f'[{sra_id}] Transferring cleaned graph failed.')
                params = {'id': sra_id, 'time': int(time.time() - start_time),
                          'size_mb': cleaned_size_mb}
                nack('transfer', params)

    # for cleaning we allow using all the available RAM
    total_ram_gb = psutil.virtual_memory().total / 1e9
    not_reserved_ram_gb = total_ram_gb - total_reserved_ram_gb
    # TODO: figure out why we have so much free memory when all cores are exhausted
    if used_cores < 2 * CORES and waiting_cleans:
        logging.info(f'Ram reserved {round(total_reserved_ram_gb, 2)}GB, total {round(total_ram_gb, 2)}')
        for sra_id, (start_time) in waiting_cleans.items():
            # remove the old clean waiting and append the new one after
            build_path = build_dir(sra_id)
            build_size_gb = util.dir_size_MB(build_path) / 1e3
            required_ram_gb = max(build_size_gb * 2, build_size_gb + 1)
            if not_reserved_ram_gb > required_ram_gb:
                logging.info(
                    f'[{sra_id}] Estimated {required_ram_gb}GB needed for cleaning, available {not_reserved_ram_gb} GB')
                kmer_count_unique = sra_info[sra_id][2]
                kmer_coverage = sra_info[sra_id][3]
                kmer_count_singletons = sra_info[sra_id][4]
                fallback = 5 if kmer_coverage > 5 else 2 if kmer_coverage > 2 or kmer_count_unique > 1e6 else 1

                # multiplying singletons by 2 bc we compute canonical graph and KMC doesn't
                start_clean(sra_id, time.time() - start_time, 2 * kmer_count_singletons, fallback, required_ram_gb,
                            not_reserved_ram_gb)
                not_reserved_ram_gb -= required_ram_gb
                del waiting_cleans[sra_id]
                break
            logging.info(f'[{sra_id}] Not enough RAM for cleaning. '
                         f'Have {round(not_reserved_ram_gb, 2)}GB need {round(build_size_gb + 0.5, 2)}GB')

    if used_cores < 2 * CORES and waiting_builds:
        logging.info(f'Ram reserved {round(total_reserved_ram_gb, 2)}GB, total {round(total_ram_gb, 2)}')
        for sra_id, (start_time) in waiting_builds.items():
            num_kmers = sra_info[sra_id][2]
            # estimate RAM needed for loading graph in memory;
            bytes_per_kmer = 2.6  # 0.6 bytes/kmer (for --small representation), 2 byte/kmer-count
            kmer_count = 2.6 * num_kmers  # 2x canonical+non-canonical +  ~30% for dummy kmers (typically it's 10%)
            required_ram_gb = round(kmer_count * bytes_per_kmer / 1e9 + 0.5, 2)
            if required_ram_gb > total_ram_gb - 2:
                download_path = download_dir(sra_id)
                logging.warning(
                    f'[{sra_id}] Building graph needs too much RAM: {required_ram_gb}GB). Removing {download_path}.')
                subprocess.run(['rm', '-rf', download_path])
                params = {'id': sra_id, 'time': int(time.time() - start_time), 'required_ram_gb': required_ram_gb}
                nack('build', params)
                del waiting_builds[sra_id]
                break
            elif required_ram_gb < not_reserved_ram_gb and not_reserved_ram_gb > 2:
                logging.info(
                    f'[{sra_id}] Estimated {required_ram_gb}GB needed for building, available {not_reserved_ram_gb} GB')
                # how much memory does it take to load all unique kmers into RAM: 8B for the kmer, 2B for the count
                required_ram_all_mem_gb = num_kmers * (8 + 2) * 3.5 / 1e9;  # also account for dummy kmers
                if required_ram_all_mem_gb < 5 and required_ram_all_mem_gb < not_reserved_ram_gb:
                    required_ram_gb = max(required_ram_gb, required_ram_all_mem_gb)
                    start_build(sra_id, time.time() - start_time, math.ceil(required_ram_all_mem_gb), 'vector',
                                required_ram_gb, not_reserved_ram_gb)
                else:
                    buffer_size_gb = max(2, min(round(required_ram_gb * 0.8 - 1), 20))
                    start_build(sra_id, time.time() - start_time, buffer_size_gb, 'vector_disk', required_ram_gb,
                                not_reserved_ram_gb)
                del waiting_builds[sra_id]
                not_reserved_ram_gb -= required_ram_gb  # not that it matters
                break
            else:
                logging.info(
                    f'[{sra_id}] Not enough RAM for building. Have {round(total_ram_gb - total_reserved_ram_gb, 2)}GB '
                    f'need {required_ram_gb}GB')

    for d in completed_transfers:
        del transfer_processes[d]
    return download_processes or build_processes or clean_processes or transfer_processes or not downloads_done
Exemple #10
0
                    default='nandos')
parser.add_argument('--image_dir', default='data/letters/my')
parser.add_argument('--label_path', default='data/letters/my.txt')
parser.add_argument('--model_dir', default='model/')
parser.add_argument('--verbosity', default=500, type=int)
parser.add_argument('--network', default='efficientdet-d0')
parser.add_argument('--device', default='cuda')
parser.add_argument('--checkpoint', default=None)
parser.add_argument('--prefix', default='letters')
args = parser.parse_args()

if __name__ == '__main__':

    device = args.device

    make_dir_if_needed(args.model_dir)

    calc_loss = total_loss()

    if args.dataset == 'nandos':
        train_dataset = NandosDataset(args.image_dir,
                                      args.label_path,
                                      device=device,
                                      transform=transforms.Compose([
                                          Augmenter(),
                                          MaxSizeResizer(1280),
                                          SquarePad(),
                                          ToTensor(),
                                      ]))
    elif args.dataset == 'letters':
        train_dataset = LetterDataset(args.image_dir,