Python for_reading Examples, libtbx.smart_open.for_reading Python Examples

Example #1

0

Show file

File: validation.py Project: dials/cctbx

def smart_load_dictionary(name=None,
                          file_path=None,
                          url=None,
                          registry_location=cifdic_register_url,
                          save_local=False,
                          store_dir=None):
    from iotbx import cif
    assert [name, file_path, url].count(None) < 3
    cif_dic = None
    if store_dir is None:
        store_dir = libtbx.env.under_dist(module_name='iotbx',
                                          path='cif/dictionaries')
    if name is not None and [file_path, url].count(None) == 2:
        if file_path is None:
            if os.path.isfile(name):
                file_path = name
            else:
                file_path = os.path.join(store_dir, name)
            if not os.path.isfile(file_path):
                gzip_path = file_path + '.gz'
                if os.path.isfile(gzip_path):
                    if save_local:
                        gz = smart_open.for_reading(gzip_path)
                        f = smart_open.for_writing(file_path)
                        shutil.copyfileobj(gz, f)
                        gz.close()
                        f.close()
                    else:
                        file_path = gzip_path
    if file_path is not None and os.path.isfile(file_path):
        file_object = smart_open.for_reading(file_path)
        cif_dic = dictionary(cif.reader(file_object=file_object).model())
        file_object.close()
    else:
        if url is None:
            url = locate_dictionary(name, registry_location=registry_location)
        file_object = urlopen(url)
        if save_local:
            if name is None:
                name = os.path.basename(url)
            f = open(os.path.join(store_dir, name), 'wb')
            shutil.copyfileobj(file_object, f)
            f.close()
            cif_dic = dictionary(
                cif.reader(file_path=os.path.join(store_dir, name)).model())
        else:
            cif_dic = dictionary(cif.reader(file_object=file_object).model())
    assert cif_dic is not None
    return cif_dic

Example #2

0

Show file

File: __init__.py Project: rimmartin/cctbx_project

 def __init__(self,
              file_path=None,
              file_object=None,
              input_string=None,
              cif_object=None,
              builder=None,
              raise_if_errors=True,
              strict=True):
     assert [file_path, file_object, input_string].count(None) == 2
     self.file_path = file_path
     if builder is None:
         builder = builders.cif_model_builder(cif_object)
     else:
         assert cif_object is None
     self.builder = builder
     if file_path is not None:
         file_object = smart_open.for_reading(file_path)
     else:
         file_path = "memory"
     if file_object is not None:
         input_string = file_object.read()
     # check input_string for binary, and abort if necessary
     binary_detector = detect_binary_file()
     binary_detector.monitor_initial = min(len(input_string),
                                           binary_detector.monitor_initial)
     if binary_detector.is_binary_file(block=input_string):
         raise CifParserError("Binary file detected, aborting parsing.")
     self.parser = ext.fast_reader(builder, input_string, file_path, strict)
     if raise_if_errors and len(self.parser.lexer_errors()):
         raise CifParserError(self.parser.lexer_errors()[0])
     if raise_if_errors and len(self.parser.parser_errors()):
         raise CifParserError(self.parser.parser_errors()[0])

Example #3

0

Show file

File: __init__.py Project: dalekreitler/cctbx-playground

 def __init__(self,
              file_path=None,
              file_object=None,
              input_string=None,
              cif_object=None,
              builder=None,
              raise_if_errors=True,
              strict=True):
   assert [file_path, file_object, input_string].count(None) == 2
   self.file_path = file_path
   if builder is None:
     builder = builders.cif_model_builder(cif_object)
   else: assert cif_object is None
   self.builder = builder
   if file_path is not None:
     file_object = smart_open.for_reading(file_path)
   else:
     file_path = "memory"
   if file_object is not None:
     input_string = file_object.read()
   # check input_string for binary, and abort if necessary
   binary_detector = detect_binary_file()
   binary_detector.monitor_initial = min(
     len(input_string), binary_detector.monitor_initial)
   if binary_detector.is_binary_file(block=input_string):
     raise CifParserError("Binary file detected, aborting parsing.")
   self.parser = ext.fast_reader(builder, input_string, file_path, strict)
   if raise_if_errors and len(self.parser.lexer_errors()):
     raise CifParserError(self.parser.lexer_errors()[0])
   if raise_if_errors and len(self.parser.parser_errors()):
     raise CifParserError(self.parser.parser_errors()[0])

Example #4

0

Show file

File: hklf.py Project: keitaroyam/cctbx_fork

 def __init__(self, file_object=None, file_name=None):
   assert [file_object, file_name].count(None) == 1
   if (file_object is None):
     from libtbx import smart_open
     file_object = smart_open.for_reading(file_name=file_name)
   from cctbx.array_family import flex
   super(reader, self).__init__(lines=flex.split_lines(file_object.read()))

Example #5

0

Show file

 def __init__(self, file_object=None, file_name=None):
   assert [file_object, file_name].count(None) == 1
   if (file_object is None):
     from libtbx import smart_open
     file_object = smart_open.for_reading(file_name=file_name)
   from cctbx.array_family import flex
   super(reader, self).__init__(lines=flex.split_lines(file_object.read()))

Example #6

0

Show file

File: validation.py Project: cctbx/cctbx-playground

def smart_load_dictionary(name=None, file_path=None, url=None,
                          registry_location=cifdic_register_url,
                          save_local=False, store_dir=None):
  from iotbx import cif
  assert [name, file_path, url].count(None) < 3
  cif_dic = None
  if store_dir is None:
    store_dir = libtbx.env.under_dist(
      module_name='iotbx', path='cif/dictionaries')
  if name is not None and [file_path, url].count(None) == 2:
    if file_path is None:
      if os.path.isfile(name):
        file_path = name
      else:
        file_path = os.path.join(store_dir, name)
      if not os.path.isfile(file_path):
        gzip_path = file_path + '.gz'
        if os.path.isfile(gzip_path):
          if save_local:
            gz = smart_open.for_reading(gzip_path)
            f = smart_open.for_writing(file_path)
            shutil.copyfileobj(gz, f)
            gz.close()
            f.close()
          else:
            file_path = gzip_path
  if file_path is not None and os.path.isfile(file_path):
    file_object = smart_open.for_reading(file_path)
    cif_dic = dictionary(cif.reader(file_object=file_object).model())
    file_object.close()
  else:
    if url is None:
      url = locate_dictionary(name, registry_location=registry_location)
    file_object = urlopen(url)
    if save_local:
      if name is None:
        name = os.path.basename(url)
      f = open(os.path.join(store_dir, name), 'wb')
      shutil.copyfileobj(file_object, f)
      f.close()
      cif_dic = dictionary(cif.reader(
        file_path=os.path.join(store_dir, name)).model())
    else:
      cif_dic = dictionary(cif.reader(
        file_object=file_object).model())
  assert cif_dic is not None
  return cif_dic

Example #7

0

Show file

File: flex.py Project: kek-pf-mx/dials

    def from_msgpack_file(filename):
        '''
    Read the reflection table from file in msgpack format

    '''
        from libtbx import smart_open
        with smart_open.for_reading(filename, 'rb') as infile:
            return reflection_table.from_msgpack(infile.read())

Example #8

0

Show file

 def __init__(self):
     """
 Loads pickle with data. Path is temporary in current work dir.
 Should be centralized somewhere else upon going to production.
 """
     db_dict = {}
     pdb_info_file = libtbx.env.find_in_repositories(
         relative_path="cctbx_project/iotbx/bioinformatics/pdb_info.csv.gz",
         test=os.path.isfile)
     csv_file = smart_open.for_reading(file_name=pdb_info_file)
     csv_reader = csv.reader(csv_file, delimiter=";")
     for row in csv_reader:
         db_dict[row[0]] = (row[1], row[2], row[3], row[4], row[5])
     self.db_dict = db_dict

Example #9

0

Show file

File: extract_rfactors_resolutions_sigma.py Project: yayahjb/cctbx_project

def extract_remark_2_and_3_records(file_name, file_lines=None):
    result = []
    if (file_lines is None):
        file_lines = smart_open.for_reading(
            file_name=file_name).read().splitlines()
    else:
        assert (file_name is None)
    for rec in file_lines:
        if (rec.startswith("REMARK   3 ") or rec.startswith("REMARK   2 ")):
            start = True
            result.append(rec)
        else:
            if (rec.startswith("ATOM ") or rec.startswith("HETATM ")):
                break
    return result

Example #10

0

Show file

File: extract_rfactors_resolutions_sigma.py Project: cctbx/cctbx-playground

def extract_remark_2_and_3_records(file_name, file_lines=None):
  result = []
  if (file_lines is None) :
    file_lines = smart_open.for_reading(
      file_name = file_name).read().splitlines()
  else :
    assert (file_name is None)
  for rec in file_lines:
    if(rec.startswith("REMARK   3 ") or rec.startswith("REMARK   2 ")):
      start = True
      result.append(rec)
    else:
      if(rec.startswith("ATOM ") or rec.startswith("HETATM ")):
        break
  return result

Example #11

0

Show file

File: flex.py Project: kek-pf-mx/dials

    def from_pickle(filename):
        '''
    Read the reflection table from pickle file.

    :param filename: The pickle filename
    :return: The reflection table

    '''
        import six.moves.cPickle as pickle
        from libtbx import smart_open

        with smart_open.for_reading(filename, 'rb') as infile:
            result = pickle.load(infile)
            assert (isinstance(result, reflection_table))
            return result

Example #12

0

Show file

File: flex.py Project: biochem-fan/dials

  def from_pickle(filename):
    '''
    Read the reflection table from pickle file.

    :param filename: The pickle filename
    :return: The reflection table

    '''
    import cPickle as pickle
    from libtbx import smart_open

    with smart_open.for_reading(filename, 'rb') as infile:
      result = pickle.load(infile)
      assert(isinstance(result, reflection_table))
      return result

Example #13

0

Show file

def run(args):
  for f in args:
    try:
      file_object = smart_open.for_reading(file_name=f)
      miller_arrays = iotbx.cif.reader(file_object=file_object).as_miller_arrays()
    except KeyboardInterrupt:
      raise
    except Exception, e:
      print "Error extracting miller arrays from file: %s:" % (
        show_string(f))
      print " ", str(e)
      continue
    for miller_array in miller_arrays:
      miller_array.show_comprehensive_summary()
      print
    r, _ = op.splitext(op.basename(f))
    easy_pickle.dump(file_name=r+'_miller_arrays.pickle', obj=miller_arrays)

Example #14

0

Show file

File: create_pdb_date_dict.py Project: youdar/work

def run():
  data_dir = '/net/cci/youval/Work/work/MTRIX/Data'
  #data_dir = r'c:\Phenix\Dev\Work\work\MTRIX\Data'
  os.chdir(data_dir)
  file_to_year_dict = {}

  files_with_good_MTRIX = set(pickle.load(open(os.path.join(data_dir,'files_with_good_MTRIX'),'r')))
  good_MTRIX_pdb_files = pickle.load(open(os.path.join(data_dir,'dict_good_MTRIX_pdb_files'),'r'))

  # find the file in LBL pdb mirror folder
  for fn in files_with_good_MTRIX:
    file_name_with_path = good_MTRIX_pdb_files[fn]
    file_lines = smart_open.for_reading(
            file_name = file_name_with_path).read().splitlines()
    year = get_year(file_lines)
    file_to_year_dict[fn] = year

  print len(file_to_year_dict)

Example #15

0

Show file

File: crystal_symmetry_from_pdb.py Project: yayahjb/cctbx_project

def extract_from(file_name=None, file=None, monitor_initial=None):
    assert [file_name, file].count(None) == 1
    if (file is None):
        file = smart_open.for_reading(file_name=file_name)
    detect_binary = detect_binary_file(monitor_initial=monitor_initial)
    line_number = 0
    for line in file:
        line_number += 1
        if (detect_binary is not None):
            is_binary = detect_binary.is_binary_file(block=line)
            if (is_binary is not None):
                if (is_binary): break
                detect_binary = None
        if (line.startswith("CRYST1")):
            return cryst1_interpretation.crystal_symmetry(cryst1_record=line)
        crystal_symmetry = cns_pdb_remarks.extract_symmetry(pdb_record=line)
        if (crystal_symmetry is not None):
            return crystal_symmetry
    raise RuntimeError("No CRYST1 record.")

Example #16

0

Show file

File: fetch.py Project: fsimkovic/cctbx_project

def fetch(id,
          data_type="pdb",
          format="pdb",
          mirror="rcsb",
          log=None,
          force_download=False,
          local_cache=None):
    """
  Locate and open a data file for the specified PDB ID and format, either in a
  local mirror or online.

  :param id: 4-character PDB ID (e.g. '1hbb')
  :param data_type: type of content to download: pdb, xray, or fasta
  :param format: format of data: cif, pdb, or xml
  :param mirror: remote site to use, either rcsb, pdbe, pdbj or pdb-redo

  :returns: a filehandle-like object (with read() method)
  """
    assert data_type in ["pdb", "xray", "fasta", "seq"]
    assert format in ["cif", "pdb", "xml"]
    assert mirror in ["rcsb", "pdbe", "pdbj", "pdb-redo"]
    validate_pdb_id(id)
    if (log is None): log = null_out()

    id = id.lower()
    if (not force_download):
        if (local_cache is not None) and (data_type == "pdb"):
            from iotbx.file_reader import guess_file_type
            if (local_cache is Auto):
                local_cache = os.getcwd()
            cache_files = os.listdir(local_cache)
            for file_name in cache_files:
                if (len(file_name) > 4):
                    file_id = re.sub("^pdb", "", file_name)[0:4]
                    if (file_id.lower() == id):
                        if (guess_file_type(file_name) == "pdb"):
                            file_name = os.path.join(local_cache, file_name)
                            print >> log, "Reading from cache directory:"
                            print >> log, "  " + file_name
                            f = smart_open.for_reading(file_name)
                            return f
        # try local mirror for PDB and X-ray data files first, if it exists
        if (data_type == "pdb") and (format == "pdb") and \
               ("PDB_MIRROR_PDB" in os.environ) :
            subdir = os.path.join(os.environ["PDB_MIRROR_PDB"], id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "pdb%s.ent.gz" % id)
                if (os.path.isfile(file_name)):
                    print >> log, "Reading from local mirror:"
                    print >> log, "  " + file_name
                    f = smart_open.for_reading(file_name)
                    return f
        if (data_type == "pdb") and (format == "cif") and \
               ("PDB_MIRROR_MMCIF" in os.environ) :
            subdir = os.path.join(os.environ["PDB_MIRROR_MMCIF"], id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "%s.cif.gz" % id)
                if (os.path.isfile(file_name)):
                    print >> log, "Reading from local mirror:"
                    print >> log, "  " + file_name
                    f = smart_open.for_reading(file_name)
                    return f
        if ((data_type == "xray")
                and ("PDB_MIRROR_STRUCTURE_FACTORS" in os.environ)):
            sf_dir = os.environ["PDB_MIRROR_STRUCTURE_FACTORS"]
            subdir = os.path.join(sf_dir, id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "r%ssf.ent.gz" % id)
                if (os.path.isfile(file_name)):
                    print >> log, "Reading from local mirror:"
                    print >> log, "  " + file_name
                    f = smart_open.for_reading(file_name)
                    return f
    # No mirror found (or out of date), default to HTTP download
    url = None
    compressed = False
    if (mirror == "rcsb"):
        url_base = 'https://files.rcsb.org/download/'
        pdb_ext = ".pdb"
        sf_prefix = ""
        sf_ext = "-sf.cif"
    elif (mirror == "pdbe"):
        url_base = "https://www.ebi.ac.uk/pdbe-srv/view/files/"
        pdb_ext = ".ent"
        sf_prefix = "r"
        sf_ext = "sf.ent"
    elif (mirror == "pdbj"):
        url_base = "ftp://ftp.pdbj.org/pub/pdb/data/structures/divided/"
        if (data_type == "pdb"):
            compressed = True
            if (format == "pdb"):
                url = url_base + "pdb/%s/pdb%s.ent.gz" % (id[1:3], id)
            elif (format == "cif"):
                url = url_base + "mmCIF/%s/%s.cif.gz" % (id[1:3], id)
        elif (data_type == "xray"):
            compressed = True
            url = url_base + "structure_factors/%s/r%ssf.ent.gz" % (id[1:3],
                                                                    id)
        elif (data_type in ["fasta", "seq"]):
            url = "https://pdbj.org/rest/downloadPDBfile?format=fasta&id=%s" % id
        if (url is None) and (data_type != "fasta"):
            raise Sorry(
                "Can't determine PDBj download URL for this data/format " +
                "combination.")
    elif mirror == "pdb-redo":
        url_base = "https://pdb-redo.eu/db/"
        pdb_ext = "_final.pdb"
        cif_ext = "_final.cif"
        sf_prefix = ""
        sf_ext = "_final.mtz"
        if (data_type == 'pdb'):
            if (format == 'pdb'):
                url = url_base + "{id}/{id}{format}".format(id=id,
                                                            format=pdb_ext)
            elif (format == 'cif'):
                url = url_base + "{id}/{id}{format}".format(id=id,
                                                            format=cif_ext)
        elif (data_type == 'xray'):
            url = url_base + "{id}/{id}{format}".format(id=id, format=sf_ext)
    if (data_type in ["fasta", "seq"]):
        # XXX the RCSB doesn't appear to have a simple URL for FASTA files
        if (url is None):  # TODO PDBe equivalent doesn't exist?
            url = "https://www.rcsb.org/pdb/download/downloadFastaFiles.do?structureIdList=%s&compressionType=uncompressed" % id
        try:
            data = libtbx.utils.urlopen(url)
        except urllib2.HTTPError, e:
            if e.getcode() == 404:
                raise RuntimeError("Couldn't download sequence for %s." % id)
            else:
                raise

Example #17

0

Show file

File: fetch.py Project: cctbx/cctbx-playground

def fetch (id, data_type="pdb", format="pdb", mirror="rcsb", log=None,
    force_download=False,
    local_cache=None) :
  """
  Locate and open a data file for the specified PDB ID and format, either in a
  local mirror or online.

  :param id: 4-character PDB ID (e.g. '1hbb')
  :param data_type: type of content to download: pdb, xray, or fasta
  :param format: format of data: cif, pdb, or xml
  :param mirror: remote site to use, either rcsb or pdbe

  :returns: a filehandle-like object (with read() method)
  """
  assert data_type in ["pdb", "xray", "fasta", "seq"]
  assert format in ["cif", "pdb", "xml"]
  assert mirror in ["rcsb", "pdbe", "pdbj"]
  validate_pdb_id(id)
  if (log is None) : log = null_out()

  id = id.lower()
  if (not force_download) :
    if (local_cache is not None) and (data_type == "pdb") :
      from iotbx.file_reader import guess_file_type
      if (local_cache is Auto) :
        local_cache = os.getcwd()
      cache_files = os.listdir(local_cache)
      for file_name in cache_files :
        if (len(file_name) > 4) :
          file_id = re.sub("^pdb", "", file_name)[0:4]
          if (file_id.lower() == id) :
            if (guess_file_type(file_name) == "pdb") :
              file_name = os.path.join(local_cache, file_name)
              print >> log, "Reading from cache directory:"
              print >> log, "  " + file_name
              f = smart_open.for_reading(file_name)
              return f
    # try local mirror for PDB and X-ray data files first, if it exists
    if (data_type == "pdb") and ("PDB_MIRROR_PDB" in os.environ) :
      subdir = os.path.join(os.environ["PDB_MIRROR_PDB"], id[1:3])
      if (os.path.isdir(subdir)) :
        file_name = os.path.join(subdir, "pdb%s.ent.gz" % id)
        if (os.path.isfile(file_name)) :
          print >> log, "Reading from local mirror:"
          print >> log, "  " + file_name
          f = smart_open.for_reading(file_name)
          return f
    if ((data_type == "xray") and
        ("PDB_MIRROR_STRUCTURE_FACTORS" in os.environ)) :
      sf_dir = os.environ["PDB_MIRROR_STRUCTURE_FACTORS"]
      subdir = os.path.join(sf_dir, id[1:3])
      if (os.path.isdir(subdir)) :
        file_name = os.path.join(subdir, "r%ssf.ent.gz" % id)
        if (os.path.isfile(file_name)) :
          print >> log, "Reading from local mirror:"
          print >> log, "  " + file_name
          f = smart_open.for_reading(file_name)
          return f
  # No mirror found (or out of date), default to HTTP download
  url = None
  compressed = False
  if (mirror == "rcsb") :
    url_base = "http://www.rcsb.org/pdb/files/"
    pdb_ext = ".pdb"
    sf_prefix = ""
    sf_ext = "-sf.cif"
  elif (mirror == "pdbe") :
    url_base = "http://www.ebi.ac.uk/pdbe-srv/view/files/"
    pdb_ext = ".ent"
    sf_prefix = "r"
    sf_ext = "sf.ent"
  elif (mirror == "pdbj") :
    url_base = "ftp://ftp.pdbj.org/pub/pdb/data/structures/divided/"
    if (data_type == "pdb") :
      compressed = True
      if (format == "pdb") :
        url = url_base + "pdb/%s/pdb%s.ent.gz" % (id[1:3], id)
      elif (format == "cif") :
        url = url_base + "mmCIF/%s/%s.cif.gz" % (id[1:3], id)
    elif (data_type == "xray") :
      compressed = True
      url = url_base + "structure_factors/%s/r%ssf.ent.gz" % (id[1:3], id)
    elif (data_type in ["fasta", "seq"]) :
      url = "http://pdbj.org/app//downloadFasta4PDBID?pdbid=%s" % id
    if (url is None) and (data_type != "fasta") :
      raise Sorry("Can't determine PDBj download URL for this data/format "+
        "combination.")
  if (data_type in ["fasta", "seq"]) :
    # XXX the RCSB doesn't appear to have a simple URL for FASTA files
    if (url is None) : # TODO PDBe equivalent doesn't exist?
      url = "http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=FASTA&compression=NO&structureId=%s" % id
    try :
      data = libtbx.utils.urlopen(url)
    except urllib2.HTTPError, e :
      if e.getcode() == 404 :
        raise RuntimeError("Couldn't download sequence for %s." % id)
      else :
        raise

Example #18

0

Show file

def fetch(id,
          data_type="pdb",
          format="pdb",
          mirror="rcsb",
          log=None,
          force_download=False,
          local_cache=None):
    """
  Locate and open a data file for the specified PDB ID and format, either in a
  local mirror or online.

  :param id: 4-character PDB ID (e.g. '1hbb')
  :param data_type: type of content to download: pdb, xray, or fasta
  :param format: format of data: cif, pdb, or xml (or cif_or_pdb)
  :param mirror: remote site to use, either rcsb, pdbe, pdbj or pdb-redo

  :returns: a filehandle-like object (with read() method)
  """
    assert data_type in ["pdb", "xray", "fasta", "seq"]
    assert format in ["cif", "pdb", "xml", "cif_or_pdb"]
    assert mirror in ["rcsb", "pdbe", "pdbj", "pdb-redo"]
    validate_pdb_id(id)
    if (log is None): log = null_out()

    id = id.lower()
    if (not force_download):
        if (local_cache is not None) and (data_type == "pdb"):
            from iotbx.file_reader import guess_file_type
            if (local_cache is Auto):
                local_cache = os.getcwd()
            cache_files = os.listdir(local_cache)
            for file_name in cache_files:
                if (len(file_name) > 4):
                    file_id = re.sub("^pdb", "", file_name)[0:4]
                    if (file_id.lower() == id):
                        if (guess_file_type(file_name) == "pdb"):
                            file_name = os.path.join(local_cache, file_name)
                            print("Reading from cache directory:", file=log)
                            print("  " + file_name, file=log)
                            f = smart_open.for_reading(file_name)
                            return f
        # try local mirror for PDB and X-ray data files first, if it exists
        if (data_type == "pdb") and (format in ["pdb", "cif_or_pdb"]) and \
               ("PDB_MIRROR_PDB" in os.environ):
            subdir = os.path.join(os.environ["PDB_MIRROR_PDB"], id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "pdb%s.ent.gz" % id)
                if (os.path.isfile(file_name)):
                    print("Reading from local mirror:", file=log)
                    print("  " + file_name, file=log)
                    f = smart_open.for_reading(file_name)
                    return f
        if (data_type == "pdb") and (format in ["cif", "cif_or_pdb"]) and \
               ("PDB_MIRROR_MMCIF" in os.environ):
            subdir = os.path.join(os.environ["PDB_MIRROR_MMCIF"], id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "%s.cif.gz" % id)
                if (os.path.isfile(file_name)):
                    print("Reading from local mirror:", file=log)
                    print("  " + file_name, file=log)
                    f = smart_open.for_reading(file_name)
                    return f
        if ((data_type == "xray")
                and ("PDB_MIRROR_STRUCTURE_FACTORS" in os.environ)):
            sf_dir = os.environ["PDB_MIRROR_STRUCTURE_FACTORS"]
            subdir = os.path.join(sf_dir, id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "r%ssf.ent.gz" % id)
                if (os.path.isfile(file_name)):
                    print("Reading from local mirror:", file=log)
                    print("  " + file_name, file=log)
                    f = smart_open.for_reading(file_name)
                    return f
    # No mirror found (or out of date), default to HTTP download
    url = None
    compressed = False
    if (mirror == "rcsb"):
        url_base = 'https://files.rcsb.org/download/'
        pdb_ext = ".pdb"
        sf_prefix = ""
        sf_ext = "-sf.cif"
    elif (mirror == "pdbe"):
        url_base = "https://www.ebi.ac.uk/pdbe-srv/view/files/"
        pdb_ext = ".ent"
        sf_prefix = "r"
        sf_ext = "sf.ent"
    elif (mirror == "pdbj"):
        url_base = "ftp://ftp.pdbj.org/pub/pdb/data/structures/divided/"
        if (data_type == "pdb"):
            compressed = True
            if (format == "pdb"):
                url = url_base + "pdb/%s/pdb%s.ent.gz" % (id[1:3], id)
            elif (format in ["cif", "cif_or_pdb"]):
                url = url_base + "mmCIF/%s/%s.cif.gz" % (id[1:3], id)
        elif (data_type == "xray"):
            compressed = True
            url = url_base + "structure_factors/%s/r%ssf.ent.gz" % (id[1:3],
                                                                    id)
        elif (data_type in ["fasta", "seq"]):
            url = "https://pdbj.org/rest/downloadPDBfile?format=fasta&id=%s" % id
        if (url is None) and (data_type != "fasta"):
            raise Sorry(
                "Can't determine PDBj download URL for this data/format " +
                "combination.")
    elif mirror == "pdb-redo":
        url_base = "https://pdb-redo.eu/db/"
        pdb_ext = "_final.pdb"
        cif_ext = "_final.cif"
        sf_prefix = ""
        sf_ext = "_final.mtz"
        if (data_type == 'pdb'):
            if (format == 'pdb'):
                url = url_base + "{id}/{id}{format}".format(id=id,
                                                            format=pdb_ext)
            elif (format in ['cif', 'cif_or_pdb']):
                url = url_base + "{id}/{id}{format}".format(id=id,
                                                            format=cif_ext)
        elif (data_type == 'xray'):
            url = url_base + "{id}/{id}{format}".format(id=id, format=sf_ext)
    if (data_type in ["fasta", "seq"]):
        if (url is None):  # TODO PDBe equivalent doesn't exist?
            # Seems that this url should be working:
            url = "https://www.rcsb.org/fasta/entry/%s" % id
        try:
            data = libtbx.utils.urlopen(url)
        except HTTPError as e:
            if e.getcode() == 404:
                raise RuntimeError("Couldn't download sequence for %s." % id)
            else:
                raise
    elif data_type == "xray":
        if (url is None):
            url = url_base + sf_prefix + id + sf_ext
        try:
            data = libtbx.utils.urlopen(url)
        except HTTPError as e:
            if e.getcode() == 404:
                raise RuntimeError(
                    "Couldn't download structure factors for %s." % id)
            else:
                raise
    else:
        if (url is None):
            if format == "pdb":
                url = url_base + id + pdb_ext
            elif format == "cif_or_pdb":
                url = url_base + id + "." + "cif"
            else:
                url = url_base + id + "." + format
        try:
            data = libtbx.utils.urlopen(url)
        except HTTPError as e:
            if e.getcode() == 404:
                raise RuntimeError("Couldn't download model for %s." % id)
            else:
                raise
    if (compressed):
        try:
            import gzip
        except ImportError:
            raise Sorry(
                "gzip module not available - please use an uncompressed " +
                "source of PDB data.")
        else:
            # XXX due to a bug in urllib2, we can't pass the supposedly file-like
            # object directly, so we read the data into a StringIO object instead
            return gzip.GzipFile(fileobj=StringIO(data.read()))
    return data