def main(): if len(argv) > 2 and not argv[2] == "all": filter_wc = set([wc.strip() for wc in argv[2:]]) else: filter_wc = None cfg_fn = argv[1] logger = logging.getLogger("wikt2dict") cfg = ConfigHandler("general", cfg_fn) logger = LogHandler(cfg) with open(cfg["wikicodes"]) as wc_f: wikicodes = set([w.strip() for w in wc_f]) n = len(wikicodes) if filter_wc: m = n - len(filter_wc) else: m = 0 num_of_tr = n * (n - 1) * (n - 2) / 6 - m * (m - 1) * (m - 2) / 6 i = 1 for triangle_wc in combinations(wikicodes, 3): if filter_wc and len(set(triangle_wc) & filter_wc) == 0: continue stderr.write(str(i) + "/" + str(num_of_tr) + repr(triangle_wc) + "\n") i += 1 logger.info(" ".join(triangle_wc) + " triangle") triangulator = Triangulator(triangle_wc, cfg_fn) triangulator.collect_triangles() triangulator.write_triangles()
def main(): if len(argv) > 2 and not argv[2] == 'all': filter_wc = set([wc.strip() for wc in argv[2:]]) else: filter_wc = None cfg_fn = argv[1] logger = logging.getLogger('wikt2dict') cfg = ConfigHandler("general", cfg_fn) logger = LogHandler(cfg) with open(cfg['wikicodes']) as wc_f: wikicodes = set([w.strip() for w in wc_f]) n = len(wikicodes) if filter_wc: m = n - len(filter_wc) else: m = 0 num_of_tr = n * (n-1) * (n-2) / 6 - m * (m-1) * (m-2) / 6 i = 1 for triangle_wc in combinations(wikicodes, 3): if filter_wc and len(set(triangle_wc) & filter_wc) == 0: continue stderr.write(str(i) + '/' + str(num_of_tr) + repr(triangle_wc) + '\n') i += 1 logger.info(' '.join(triangle_wc) + ' triangle') triangulator = Triangulator(triangle_wc, cfg_fn) triangulator.collect_triangles() triangulator.write_triangles()
def __init__(self, triangle_wc, cfg_fn): self.wikicodes = set(triangle_wc) self.cfg_general = ConfigHandler("general", cfg_fn) self.log_handler = LogHandler(self.cfg_general) self.pairs = defaultdict(lambda: defaultdict(lambda: defaultdict( lambda: defaultdict(list)))) self.triangles = defaultdict(list) self.read_three_configs(cfg_fn) self.read_pairs_in_three_langs()
def __init__(self, wc, cfg_fn): """ @param wc: Wiktionary code @param cfg_fn: name and path of the configuration file """ #try: self.wc = wc self.cfg = ConfigHandler(wc, cfg_fn) self.log_handler = LogHandler(self.cfg) self.init_parser_of_type() self.dump_path = (self.cfg['dumpdir'] + '/' + self.cfg['fullname'] + '/' + self.wc + 'wiktionary.txt')
def __init__(self, wc, cfg_fn): """ @param wc: Wiktionary code @param cfg_fn: name and path of the configuration file """ try: self.wc = wc self.cfg = ConfigHandler(wc, cfg_fn) self.log_handler = LogHandler(self.cfg) self.init_parser_of_type() self.dump_path = (self.cfg['dumpdir'] + '/' + self.cfg['fullname'] + '/' + self.wc + 'wiktionary.txt') except KeyError as e: self.log_handler.error(str(e.message) + \ " parameter must be defined in config file ") except NoSectionError as e: self.log_handler.error("Section not defined " + str(wc)) except Exception as e: self.log_handler.error("Unknown error " + str(e))
class Wiktionary(object): """ A class for handling one edition of Wiktionary """ def __init__(self, wc, cfg_fn): """ @param wc: Wiktionary code @param cfg_fn: name and path of the configuration file """ try: self.wc = wc self.cfg = ConfigHandler(wc, cfg_fn) self.log_handler = LogHandler(self.cfg) self.init_parser_of_type() self.dump_path = (self.cfg['dumpdir'] + '/' + self.cfg['fullname'] + '/' + self.wc + 'wiktionary.txt') except KeyError as e: self.log_handler.error(str(e.message) + \ " parameter must be defined in config file ") except NoSectionError as e: self.log_handler.error("Section not defined " + str(wc)) except Exception as e: self.log_handler.error("Unknown error " + str(e)) def init_parser_of_type(self): """ Initialize the appropriate parser specified in the configuration file """ type_ = self.cfg['parser_type'] if type_ == 'default': self.article_parser = DefaultArticleParser(self) elif type_ == 'langnames': self.article_parser = ArticleParserWithLangnames(self) def set_parser(self, parser): self.article_parser = parser def read_dump(self): """ Iterate through dump and yield each article as a tuple of its title and text """ txt_f = open(self.dump_path) page_sep = '%%#PAGE' this_title = unicode() this_article = unicode() last_title = unicode() last_article = unicode() for l in txt_f: if l.startswith(page_sep): if this_article and this_title: last_article = this_article last_title = this_title this_article = unicode() this_title = l.split(page_sep)[-1].strip().decode('utf8') yield tuple([last_title, last_article]) else: this_title = l.split(page_sep)[-1].strip().decode('utf8') else: this_article += l.decode('utf8') txt_f.close() yield tuple([this_title, this_article]) def parse_all_articles(self): """ Calling parse_article for each article """ for article in self.read_dump(): self.article_parser.parse_article(article) def write_pairs(self): """ Writing the extracted translations to file """ self.article_parser.write_word_pairs_to_file()
# app.secret_key = os.urandom(12) app.config['MAIL_SERVER'] = 'smtp.sendgrid.net' app.config['MAIL_PORT'] = '465' app.config['MAIL_USE_SSL'] = 'True' app.config['MAIL_USERNAME'] = '******' app.config['PASSWORD'] = '******'.format(os.getenv('emailapikey')) app.config['MAIL_DEFAULT_SENDER'] = '*****@*****.**' app.config['SESSION_TYPE'] = 'redis' app.config['SESSION_REDIS'] = Redis('192.168.5.75') app.config['SECRET_KEY'] = os.getenv('sessionkey') logger = Logger("BillTrakCore") logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logger.info) hdlr = LogHandler() logger.addHandler(hdlr) mail = Mail(app) Session().init_app(app) app.wsgi_app = ProxyFix(app.wsgi_app, x_host=1, x_proto=1) # @app.before_request # def before_request(): # if not request.is_secure: # url = request.url.replace("http://", "https://", 1) # # code = 301 # return redirect(url) @app.context_processor def inject_user():