def init_samples(): global samples samples_config_file_path = os.path.join(databasic.get_config_dir(), 'sample-data.json') samples = json.load(open(samples_config_file_path)) if databasic.app.config.get( databasic.ENV_APP_MODE) == databasic.APP_MODE_DEV: # change the paths to absolute onesls for sample in samples: sample['path'] = os.path.join(databasic.get_base_dir(), sample['source']) logger.info("Updated sample data with base dir: %s" % databasic.get_base_dir()) else: # copy from server to local temp dir and change to abs paths (to temp dir files) url_base = databasic.app.config.get('SAMPLE_DATA_SERVER') for sample in samples: url = url_base + sample['source'] logger.info("Loading sample data file: %s" % url) text = requests.get(url).text f = tempfile.NamedTemporaryFile(mode="w", delete=False) f.write(text) f.close() sample['path'] = f.name logger.info("Downloaded sample data and saved to tempdir") for sample in samples: file_size = os.stat(sample['path']).st_size logger.debug(" Cached %d bytes of %s to %s", file_size, sample['source'], sample['path'])
def init_samples(): global samples samples_config_file_path = os.path.join(databasic.get_config_dir(),'sample-data.json') samples = json.load(open(samples_config_file_path)) if databasic.app.config.get(databasic.ENV_APP_MODE) == databasic.APP_MODE_DEV: # change the paths to absolute ones for sample in samples: sample['path'] = os.path.join(databasic.get_base_dir(),sample['source']) logger.info("Updated sample data with base dir: %s" % databasic.get_base_dir()) else: # copy from server to local temp dir and change to abs paths (to temp dir files) url_base = databasic.app.config.get('SAMPLE_DATA_SERVER') for sample in samples: url = url_base+sample['source'] logger.info("Loading sample data file: %s" % url) text = urlopen(url).read() f = tempfile.NamedTemporaryFile(delete=False) f.write(text) f.close() sample['path'] = f.name logger.info("Downloaded sample data and saved to tempdir") for sample in samples: file_size = os.stat(sample['path']).st_size logger.debug(" Cached %d bytes of %s to %s", file_size, sample['source'], sample['path'])
def _custom_stopwords_list(language, force=True): """ We have some extra stopwords that we want to use in some of our languages :param language: NLTK-compatible name of language :return: a list of stopwords we added for that language, [] if none to add """ if force or (language not in language2stopwords): path_to_file = os.path.join(get_base_dir(), 'databasic', 'logic', 'stopwords', language) try: f = open(path_to_file, 'r') custom_stopwords = [ w.strip() for w in f.readlines() if len(w.strip()) > 0 ] # logger.debug("Loaded {} custom {} stopwords".format(len(custom_stopwords), language)) f.close() except OSError: custom_stopwords = [] language2stopwords[language] = custom_stopwords # speed things up by caching the stopword lists in memory, so it isn't file I/O bound return language2stopwords[language]
def download_activity_guide(): filename = "WTFcsv Activity Guide.pdf" dir_path = os.path.join(get_base_dir(), 'databasic', 'static', 'files', 'activity-guides', g.current_lang) logger.debug("download activity guide from %s/%s", dir_path, filename) return send_from_directory(directory=dir_path, filename=filename)
def download_user_template(): filename = "ctd-template.csv" dir_path = os.path.join(get_base_dir(), 'databasic', 'static', 'files', 'user-templates', g.current_lang) logger.debug("download user template from %s/%s", dir_path, filename) return send_from_directory(directory=dir_path, filename=filename)
def download_activity_guide(): filename = "WordCounter Activity Guide.pdf" dir_path = os.path.join(get_base_dir(),'databasic','static','files','activity-guides',g.current_lang) logger.debug("download activity guide from %s/%s", dir_path, filename) return send_from_directory(directory=dir_path, filename=filename)
def download_user_template(): filename = "ctd-template.csv" dir_path = os.path.join(get_base_dir(),'databasic','static','files','user-templates',g.current_lang) logger.debug("download user template from %s/%s", dir_path, filename) return send_from_directory(directory=dir_path, filename=filename)