def get_contenttype_and_encoding(filename): # use Tika and data enrichment/data analysis functions from ETL tika = enhance_extract_text_tika_server() parameters = {} parameters['filename'] = filename parameters, data = tika.process(parameters=parameters, data={}) contenttype = data['content_type_ss'] # get charset if plain text file to extract with right charset if 'encoding_s' in data: encoding = data['encoding_s'] else: encoding = 'utf-8' return contenttype, encoding
def get_contenttype_and_encoding(filename): # use Tika and data enrichment/data analysis functions from ETL tika = enhance_extract_text_tika_server() parameters = {} parameters['filename'] = filename parameters, data = tika.process(parameters=parameters, data = {}) contenttype = data['content_type_ss'] # get charset if plain text file to extract with right charset if 'encoding_s' in data: encoding = data['encoding_s'] else: encoding = 'utf-8' return contenttype, encoding