def _parse_s3_config(config_file_name, config_format='boto', profile=None): """ Parses a config file for s3 credentials. Can currently parse boto, s3cmd.conf and AWS SDK config formats :param config_file_name: path to the config file :type config_file_name: str :param config_format: config type. One of "boto", "s3cmd" or "aws". Defaults to "boto" :type config_format: str :param profile: profile name in AWS type config file :type profile: str """ Config = configparser.ConfigParser() if Config.read(config_file_name): # pragma: no cover sections = Config.sections() else: raise AirflowException("Couldn't read {0}".format(config_file_name)) # Setting option names depending on file format if config_format is None: config_format = 'boto' conf_format = config_format.lower() if conf_format == 'boto': # pragma: no cover if profile is not None and 'profile ' + profile in sections: cred_section = 'profile ' + profile else: cred_section = 'Credentials' elif conf_format == 'aws' and profile is not None: cred_section = profile else: cred_section = 'default' # Option names if conf_format in ('boto', 'aws'): # pragma: no cover key_id_option = 'aws_access_key_id' secret_key_option = 'aws_secret_access_key' # security_token_option = 'aws_security_token' else: key_id_option = 'access_key' secret_key_option = 'secret_key' # Actual Parsing if cred_section not in sections: raise AirflowException("This config file format is not recognized") else: try: access_key = Config.get(cred_section, key_id_option) secret_key = Config.get(cred_section, secret_key_option) calling_format = None if Config.has_option(cred_section, 'calling_format'): calling_format = Config.get(cred_section, 'calling_format') except: log = LoggingMixin().logger log.warning("Option Error in parsing s3 config file") raise return (access_key, secret_key, calling_format)
def _to_timestamp(cls, col): """ Convert a column of a dataframe to UNIX timestamps if applicable :param col: A Series object representing a column of a dataframe. """ # try and convert the column to datetimes # the column MUST have a four digit year somewhere in the string # there should be a better way to do this, # but just letting pandas try and convert every column without a format # caused it to convert floats as well # For example, a column of integers # between 0 and 10 are turned into timestamps # if the column cannot be converted, # just return the original column untouched try: col = pd.to_datetime(col) except ValueError: log = LoggingMixin().logger log.warning( "Could not convert field to timestamps: %s", col.name ) return col # now convert the newly created datetimes into timestamps # we have to be careful here # because NaT cannot be converted to a timestamp # so we have to return NaN converted = [] for i in col: try: converted.append(i.timestamp()) except ValueError: converted.append(pd.np.NaN) except AttributeError: converted.append(pd.np.NaN) # return a new series that maintains the same index as the original return pd.Series(converted, index=col.index)
def load_login(): log = LoggingMixin().logger auth_backend = 'airflow.default_login' try: if conf.getboolean('webserver', 'AUTHENTICATE'): auth_backend = conf.get('webserver', 'auth_backend') except conf.AirflowConfigException: if conf.getboolean('webserver', 'AUTHENTICATE'): log.warning( "auth_backend not found in webserver config reverting to " "*deprecated* behavior of importing airflow_login") auth_backend = "airflow_login" try: global login login = import_module(auth_backend) except ImportError as err: log.critical( "Cannot import authentication module %s. " "Please correct your authentication backend or disable authentication: %s", auth_backend, err) if conf.getboolean('webserver', 'AUTHENTICATE'): raise AirflowException("Failed to import authentication backend")