def compile_arf(corp_id, subcorp_path, attr, logfile): """ Precalculate ARF data for collocations and wordlists. (see freq_calc.build_arf_db) """ corp = _load_corp(corp_id, subcorp_path) num_wait = 20 if not is_compiled(corp, attr, 'freq'): base_path = freq_calc.corp_freqs_cache_path(corp, attr) frq_data_file = '%s.frq' % base_path while num_wait > 0 and freq_calc.calc_is_running(base_path, 'frq'): if os.path.isfile(frq_data_file): break time.sleep(1) num_wait -= 1 if not os.path.isfile(frq_data_file): _compile_frq(corp, attr, logfile) corp = _load_corp(corp_id, subcorp_path) # must reopen freq files if is_compiled(corp, attr, 'arf'): with open(logfile, 'a') as f: f.write('\n100 %\n') # to get proper calculation of total progress return {'message': 'arf already compiled'} with stderr_redirector(open(logfile, 'a')): corp.compile_arf(attr) return { 'message': 'OK', 'last_log_record': freq_calc.get_log_last_line(logfile) }
def compile_arf(corp_id, subcorp_path, attr, logfile): """ Precalculate ARF data for collocations and wordlists. (see freq_calc.build_arf_db) """ corp = _load_corp(corp_id, subcorp_path) num_wait = 20 if not is_compiled(corp, attr, 'freq'): base_path = freq_calc.corp_freqs_cache_path(corp, attr) frq_data_file = '%s.frq' % base_path while num_wait > 0 and freq_calc.calc_is_running(base_path, 'frq'): if os.path.isfile(frq_data_file): break time.sleep(1) num_wait -= 1 if not os.path.isfile(frq_data_file): _compile_frq(corp, attr, logfile) corp = _load_corp(corp_id, subcorp_path) # must reopen freq files if is_compiled(corp, attr, 'arf'): with open(logfile, 'a') as f: f.write('\n100 %\n') # to get proper calculation of total progress return {'message': 'arf already compiled'} with stderr_redirector(open(logfile, 'a')): corp.compile_arf(attr) return {'message': 'OK', 'last_log_record': freq_calc.get_log_last_line(logfile)}
def compile_docf(user_id, corp_id, subcorp, attr, logfile): """ Precalculate document counts data for collocations and wordlists. (see freq_calc.build_arf_db) """ corp = _load_corp(corp_id, subcorp, user_id) if is_compiled(corp, attr, 'docf'): with open(logfile, 'a') as f: f.write('\n100 %\n') # to get proper calculation of total progress return {'message': 'docf already compiled'} doc_struct = corp.get_conf('DOCSTRUCTURE') try: doc = corp.get_struct(doc_struct) with stderr_redirector(open(logfile, 'a')): corp.compile_docf(attr, doc.name) with open(logfile, 'a') as f: f.write('\n100 %\n') return { 'message': 'OK', 'last_log_record': freq_calc.get_log_last_line(logfile) } except manatee.AttrNotFound: raise WorkerTaskException( 'Failed to compile docf: attribute {}.{} not found in {}'.format( doc_struct, attr, corp_id))
def _compile_frq(corp, attr, logfile): """ Generate pre-calculated data for frequency distribution pages. arguments: corp -- a manatee.Corpus instance attr -- an attribute name logfile -- a file where calculation status will be written (bonito-open approach) """ if is_compiled(corp, attr, 'freq'): with open(logfile, 'a') as f: f.write('\n100 %\n') # to get proper calculation of total progress return {'message': 'freq already compiled'} with stderr_redirector(open(logfile, 'a')): corp.compile_frq(attr) return {'message': 'OK', 'last_log_record': freq_calc.get_log_last_line(logfile)}
def compile_docf(corp_id, subcorp_path, attr, logfile): """ Precalculate document counts data for collocations and wordlists. (see freq_calc.build_arf_db) """ corp = _load_corp(corp_id, subcorp_path) if is_compiled(corp, attr, 'docf'): with open(logfile, 'a') as f: f.write('\n100 %\n') # to get proper calculation of total progress return {'message': 'docf already compiled'} doc_struct = corp.get_conf('DOCSTRUCTURE') try: doc = corp.get_struct(doc_struct) with stderr_redirector(open(logfile, 'a')): corp.compile_docf(attr, doc.name) return {'message': 'OK', 'last_log_record': freq_calc.get_log_last_line(logfile)} except manatee.AttrNotFound: raise WorkerTaskException('Failed to compile docf: attribute %s.%s not found in %s' % ( doc_struct, attr, corp_id))