def get_info_for(pdb_code):
    pdb_suffix = pdb_code.lower().strip()
    # print(f"start: {pdb_suffix}")
    if pdb_suffix+".pkl" not in pdb_paths.PDB_PARSED_SET:
        print(f"{pdb_suffix} not in pdb_paths.PDB_PARSED_SET")
        if pdb_suffix+".pdb" in pdb_paths.PDB_FILES_SET:
            print(f"{pdb_suffix} not in pdb_paths.PDB_FILES_SET")
            logging.info(f"{pdb_suffix} not found in PDB_PARSED_SET, "
                            f"but is in PDB_FILES_SET.")
            get_success = loaders.load_pdb_info(pdb_code)
        else:
            logging.info(f"{pdb_suffix} not found in PDB_PARSED_SET or "
                         f"PDB_FILES_SET.")
            # todo: test this
            get_success = download(pdb_code, silent=False)
            if get_success:
                get_success = loaders.load_pdb_info(pdb_code)
        if not get_success:
            print(f"get_info_for(pdb_code) failed for {pdb_code}")
            logging.warning(f"Loading of pdb file {pdb_suffix} fails.")
            return None
        pdb_paths.PDB_PARSED_SET.add(pdb_suffix + ".pkl")
        pdb_paths.PDB_FILES_SET.add(pdb_suffix + ".pdb")
    filepath = os.path.join(pdb_paths.PDB_PARSED, pdb_suffix+'.pkl')
    with open(filepath, 'rb') as file:
        output = pickle.load(file)
    return output
Beispiel #2
0
def get_info_for(pdb_code, reset=False):
    pdb_suffix = pdb_code.lower().strip() + ".pkl"
    if reset:
        get_success = download(pdb_code, silent=False)
        if not get_success:
            print(f"download(pdb_code) failed for {pdb_code}")
            return None
        get_success = loaders.load_pdb_info(pdb_code)
        if not get_success:
            print(f"load_pdb_info(pdb_code) failed for {pdb_code}")
            return None
    else:
        if pdb_suffix not in paths.PDB_PARSED_SET:
            s3 = boto3.client('s3',
                              aws_access_key_id="AKIAY6UR252SQUQ3OSWZ",
                              aws_secret_access_key="08LQj"
                              "+ryk9SMojG18vERXKKzhNSYk5pLhAjrIAVX")
            output_path = os.path.join(paths.PDB_PARSED, pdb_suffix)
            print(f"S3: {pdb_suffix}")
            with open(output_path, 'wb') as f:
                try:
                    s3.download_fileobj('definedproteins', pdb_suffix, f)
                    paths.PDB_PARSED_SET = set(os.listdir(paths.PDB_PARSED))
                    get_success = True
                    print(f"S3 Success")
                except:
                    print(f"S3 Fail")
                    if pdb_code.lower().strip(
                    ) + ".pdb" in paths.PDB_FILES_SET:
                        get_success = loaders.load_pdb_info(pdb_code)
                    else:
                        get_success = download(pdb_code, silent=False)
                        if get_success:
                            paths.PDB_FILES_SET = set(
                                os.listdir(paths.PDB_FILES))
                            get_success = loaders.load_pdb_info(pdb_code)
                        else:
                            print(f"download(pdb_code) failed for {pdb_code}")
            if not get_success:
                print(f"get_info_for(pdb_code) failed for {pdb_code}")
                return None
            paths.PDB_PARSED_SET = set(os.listdir(paths.PDB_PARSED))
    filepath = os.path.join(paths.PDB_PARSED, pdb_suffix)
    with open(filepath, 'rb') as file:
        output = pickle.load(file)
    return output
def get_info_no_cache(pdb_code):
    pdb_suffix = pdb_code.lower().strip()
    get_success = download(pdb_code, silent=False)
    if get_success:
        get_success = loaders.load_pdb_info(pdb_code)
    if not get_success:
        logging.warning(f"Loading of pdb file {pdb_suffix} fails.")
        return None
    filepath = os.path.join(pdb_paths.PDB_PARSED, pdb_suffix + '.pkl')
    with open(filepath, 'rb') as file:
        output = pickle.load(file)
    output_path = os.path.join(pdb_paths.PDB_FILES, pdb_code + ".pdb")
    # os.remove(filepath)
    # os.remove(output_path)
    return output
def preload_all():
    for filename in pdb_paths.PDB_FILES_SET:
        print(filename)
        pdb_code = filename.split(".")[0]
        loaders.load_pdb_info(pdb_code)