def ccopen(source, *args, **kwargs): """Guess the identity of a particular log file and return an instance of it. Inputs: source - a single logfile, a list of logfiles (for a single job), an input stream, or an URL pointing to a log file. *args, **kwargs - arguments and keyword arguments passed to filetype Returns: one of ADF, DALTON, GAMESS, GAMESS UK, Gaussian, Jaguar, Molpro, MOPAC, NWChem, ORCA, Psi3, Psi/Psi4, QChem, CJSON or None (if it cannot figure it out or the file does not exist). """ inputfile = None is_stream = False # Check if source is a link or contains links. Retrieve their content. # Try to open the logfile(s), using openlogfile, if the source is a string (filename) # or list of filenames. If it can be read, assume it is an open file object/stream. if isinstance(source, pathlib.PurePath): source = str(source) if isinstance(source, pathlib.PurePath)\ and all([isinstance(s, pathlib.PurePath) for s in source]): source = [str(item) for item in source] is_string = isinstance(source, str) is_url = True if is_string and URL_PATTERN.match(source) else False is_listofstrings = isinstance(source, list) and all([isinstance(s, str) for s in source]) if is_string or is_listofstrings: # Process links from list (download contents into temporary location) if is_listofstrings: filelist = [] for filename in source: if not URL_PATTERN.match(filename): filelist.append(filename) else: try: response = urlopen(filename) tfile = NamedTemporaryFile(delete=False) tfile.write(response.read()) # Close the file because Windows won't let open it second time tfile.close() filelist.append(tfile.name) # Delete temporary file when the program finishes atexit.register(os.remove, tfile.name) except (ValueError, URLError) as error: if not kwargs.get('quiet', False): (errno, strerror) = error.args return None source = filelist if not is_url: try: inputfile = logfileparser.openlogfile(source) except IOError as error: if not kwargs.get('quiet', False): (errno, strerror) = error.args return None else: try: response = urlopen(source) is_stream = True # Retrieve filename from URL if possible filename = re.findall(r"\w+\.\w+", source.split('/')[-1]) filename = filename[0] if filename else "" inputfile = logfileparser.openlogfile(filename, object=response.read()) except (ValueError, URLError) as error: if not kwargs.get('quiet', False): (errno, strerror) = error.args return None elif hasattr(source, "read"): inputfile = source is_stream = True # Streams are tricky since they don't have seek methods or seek won't work # by design even if it is present. We solve this now by reading in the # entire stream and using a StringIO buffer for parsing. This might be # problematic for very large streams. Slow streams might also be an issue if # the parsing is not instantaneous, but we'll deal with such edge cases # as they arise. Ideally, in the future we'll create a class dedicated to # dealing with these issues, supporting both files and streams. if is_stream: try: inputfile.seek(0, 0) except (AttributeError, IOError): contents = inputfile.read() try: inputfile = io.StringIO(contents) except: inputfile = io.StringIO(unicode(contents)) inputfile.seek(0, 0) # Proceed to return an instance of the logfile parser only if the filetype # could be guessed. Need to make sure the input file is closed before creating # an instance, because parsers will handle opening/closing on their own. filetype = guess_filetype(inputfile) # If the input file isn't a standard compchem log file, try one of # the readers, falling back to Open Babel. if not filetype: if kwargs.get("cjson"): filetype = readerclasses['cjson'] elif source and not is_stream: ext = os.path.splitext(source)[1][1:].lower() for extension in readerclasses: if ext == extension: filetype = readerclasses[extension] # Proceed to return an instance of the logfile parser only if the filetype # could be guessed. Need to make sure the input file is closed before creating # an instance, because parsers will handle opening/closing on their own. if filetype: # We're going to close and reopen below anyway, so this is just to avoid # the missing seek method for fileinput.FileInput. In the long run # we need to refactor to support for various input types in a more # centralized fashion. if is_listofstrings: pass else: inputfile.seek(0, 0) if not is_stream: if is_listofstrings: if filetype == Turbomole: source = sort_turbomole_outputs(source) inputfile.close() return filetype(source, *args, **kwargs) return filetype(inputfile, *args, **kwargs)
def ccopen(source, *args, **kwargs): """Guess the identity of a particular log file and return an instance of it. Inputs: source - a single logfile, a list of logfiles (for a single job), an input stream, or an URL pointing to a log file. *args, **kwargs - arguments and keyword arguments passed to filetype Returns: one of ADF, DALTON, GAMESS, GAMESS UK, Gaussian, Jaguar, Molpro, MOPAC, NWChem, ORCA, Psi3, Psi/Psi4, QChem, CJSON or None (if it cannot figure it out or the file does not exist). """ inputfile = None is_stream = False # Check if source is a link or contains links. Retrieve their content. # Try to open the logfile(s), using openlogfile, if the source is a string (filename) # or list of filenames. If it can be read, assume it is an open file object/stream. is_string = isinstance(source, str) is_url = True if is_string and URL_PATTERN.match(source) else False is_listofstrings = isinstance(source, list) and all([isinstance(s, str) for s in source]) if is_string or is_listofstrings: # Process links from list (download contents into temporary location) if is_listofstrings: filelist = [] for filename in source: if not URL_PATTERN.match(filename): filelist.append(filename) else: try: response = urlopen(filename) tfile = NamedTemporaryFile(delete=False) tfile.write(response.read()) # Close the file because Windows won't let open it second time tfile.close() filelist.append(tfile.name) # Delete temporary file when the program finishes atexit.register(os.remove, tfile.name) except (ValueError, URLError) as error: if not kwargs.get('quiet', False): (errno, strerror) = error.args return None source = filelist if not is_url: try: inputfile = logfileparser.openlogfile(source) except IOError as error: if not kwargs.get('quiet', False): (errno, strerror) = error.args return None else: try: response = urlopen(source) is_stream = True # Retrieve filename from URL if possible filename = re.findall("\w+\.\w+", source.split('/')[-1]) filename = filename[0] if filename else "" inputfile = logfileparser.openlogfile(filename, object=response.read()) except (ValueError, URLError) as error: if not kwargs.get('quiet', False): (errno, strerror) = error.args return None elif hasattr(source, "read"): inputfile = source is_stream = True # Streams are tricky since they don't have seek methods or seek won't work # by design even if it is present. We solve this now by reading in the # entire stream and using a StringIO buffer for parsing. This might be # problematic for very large streams. Slow streams might also be an issue if # the parsing is not instantaneous, but we'll deal with such edge cases # as they arise. Ideally, in the future we'll create a class dedicated to # dealing with these issues, supporting both files and streams. if is_stream: try: inputfile.seek(0, 0) except (AttributeError, IOError): contents = inputfile.read() try: inputfile = io.StringIO(contents) except: inputfile = io.StringIO(unicode(contents)) inputfile.seek(0, 0) # Proceed to return an instance of the logfile parser only if the filetype # could be guessed. Need to make sure the input file is closed before creating # an instance, because parsers will handle opening/closing on their own. filetype = guess_filetype(inputfile) # If the input file isn't a standard compchem log file, try one of # the readers, falling back to Open Babel. if not filetype: if kwargs.get("cjson"): filetype = readerclasses['cjson'] elif source and not is_stream: ext = os.path.splitext(source)[1][1:].lower() for extension in readerclasses: if ext == extension: filetype = readerclasses[extension] # Proceed to return an instance of the logfile parser only if the filetype # could be guessed. Need to make sure the input file is closed before creating # an instance, because parsers will handle opening/closing on their own. if filetype: # We're going to clase and reopen below anyway, so this is just to avoid # the missing seek method for fileinput.FileInput. In the long run # we need to refactor to support for various input types in a more # centralized fashion. if is_listofstrings: pass else: inputfile.seek(0, 0) if not is_stream: if is_listofstrings: if filetype == Turbomole: source = sort_turbomole_outputs(source) inputfile.close() return filetype(source, *args, **kwargs) return filetype(inputfile, *args, **kwargs)