Example #1
0
def main():
    if len(argv) > 2 and not argv[2] == "all":
        filter_wc = set([wc.strip() for wc in argv[2:]])
    else:
        filter_wc = None
    cfg_fn = argv[1]
    logger = logging.getLogger("wikt2dict")
    cfg = ConfigHandler("general", cfg_fn)
    logger = LogHandler(cfg)
    with open(cfg["wikicodes"]) as wc_f:
        wikicodes = set([w.strip() for w in wc_f])
    n = len(wikicodes)
    if filter_wc:
        m = n - len(filter_wc)
    else:
        m = 0
    num_of_tr = n * (n - 1) * (n - 2) / 6 - m * (m - 1) * (m - 2) / 6
    i = 1
    for triangle_wc in combinations(wikicodes, 3):
        if filter_wc and len(set(triangle_wc) & filter_wc) == 0:
            continue
        stderr.write(str(i) + "/" + str(num_of_tr) + repr(triangle_wc) + "\n")
        i += 1
        logger.info(" ".join(triangle_wc) + " triangle")
        triangulator = Triangulator(triangle_wc, cfg_fn)
        triangulator.collect_triangles()
        triangulator.write_triangles()
Example #2
0
def main():
    if len(argv) > 2 and not argv[2] == 'all':
        filter_wc = set([wc.strip() for wc in argv[2:]])
    else:
        filter_wc = None
    cfg_fn = argv[1]
    logger = logging.getLogger('wikt2dict')
    cfg = ConfigHandler("general", cfg_fn)
    logger = LogHandler(cfg)
    with open(cfg['wikicodes']) as wc_f:
        wikicodes = set([w.strip() for w in wc_f])
    n = len(wikicodes)
    if filter_wc:
        m = n - len(filter_wc)
    else:
        m = 0
    num_of_tr = n * (n-1) * (n-2) / 6 - m * (m-1) * (m-2) / 6
    i = 1
    for triangle_wc in combinations(wikicodes, 3):
        if filter_wc and len(set(triangle_wc) & filter_wc) == 0:
            continue
        stderr.write(str(i) + '/' + str(num_of_tr) + repr(triangle_wc) + '\n')
        i += 1
        logger.info(' '.join(triangle_wc) + ' triangle')
        triangulator = Triangulator(triangle_wc, cfg_fn)
        triangulator.collect_triangles()
        triangulator.write_triangles()
Example #3
0
 def __init__(self, triangle_wc, cfg_fn):
     self.wikicodes = set(triangle_wc)
     self.cfg_general = ConfigHandler("general", cfg_fn)
     self.log_handler = LogHandler(self.cfg_general)
     self.pairs = defaultdict(lambda: defaultdict(lambda: defaultdict(
         lambda: defaultdict(list))))
     self.triangles = defaultdict(list)
     self.read_three_configs(cfg_fn)
     self.read_pairs_in_three_langs()
Example #4
0
 def __init__(self, wc, cfg_fn):
     """ 
     @param wc: Wiktionary code
     @param cfg_fn: name and path of the configuration file
     """
     #try:
     self.wc = wc
     self.cfg = ConfigHandler(wc, cfg_fn)
     self.log_handler = LogHandler(self.cfg)
     self.init_parser_of_type()
     self.dump_path = (self.cfg['dumpdir'] + '/' + self.cfg['fullname'] +
                       '/' + self.wc + 'wiktionary.txt')
Example #5
0
 def __init__(self, wc, cfg_fn):
     """ 
     @param wc: Wiktionary code
     @param cfg_fn: name and path of the configuration file
     """
     try:
         self.wc = wc
         self.cfg = ConfigHandler(wc, cfg_fn)
         self.log_handler = LogHandler(self.cfg)
         self.init_parser_of_type()
         self.dump_path = (self.cfg['dumpdir'] + '/' + self.cfg['fullname'] + '/' +
                 self.wc + 'wiktionary.txt')
     except KeyError as e:
         self.log_handler.error(str(e.message) + \
                                " parameter must be defined in config file ")
     except NoSectionError as e:
         self.log_handler.error("Section not defined " + str(wc))
     except Exception as e:
         self.log_handler.error("Unknown error " + str(e))
Example #6
0
class Wiktionary(object):
    """ A class for handling one edition of Wiktionary """

    def __init__(self, wc, cfg_fn):
        """ 
        @param wc: Wiktionary code
        @param cfg_fn: name and path of the configuration file
        """
        try:
            self.wc = wc
            self.cfg = ConfigHandler(wc, cfg_fn)
            self.log_handler = LogHandler(self.cfg)
            self.init_parser_of_type()
            self.dump_path = (self.cfg['dumpdir'] + '/' + self.cfg['fullname'] + '/' +
                    self.wc + 'wiktionary.txt')
        except KeyError as e:
            self.log_handler.error(str(e.message) + \
                                   " parameter must be defined in config file ")
        except NoSectionError as e:
            self.log_handler.error("Section not defined " + str(wc))
        except Exception as e:
            self.log_handler.error("Unknown error " + str(e))

    def init_parser_of_type(self):
        """ Initialize the appropriate parser specified in the configuration file """
        type_ = self.cfg['parser_type']
        if type_ == 'default':
            self.article_parser = DefaultArticleParser(self)
        elif type_ == 'langnames':
            self.article_parser = ArticleParserWithLangnames(self)

    def set_parser(self, parser):
        self.article_parser = parser

    def read_dump(self):
        """ Iterate through dump and yield each article 
        as a tuple of its title and text """
        txt_f = open(self.dump_path)
        page_sep = '%%#PAGE'        
        this_title = unicode()
        this_article = unicode()
        last_title = unicode()
        last_article = unicode()
        for l in txt_f:
            if l.startswith(page_sep):
                if this_article and this_title:
                    last_article = this_article
                    last_title = this_title
                    this_article = unicode()
                    this_title = l.split(page_sep)[-1].strip().decode('utf8')
                    yield tuple([last_title, last_article])
                else:
                    this_title = l.split(page_sep)[-1].strip().decode('utf8')
            else:
                this_article += l.decode('utf8')
        txt_f.close()
        yield tuple([this_title, this_article])

    def parse_all_articles(self):
        """ Calling parse_article for each article """
        for article in self.read_dump():
            self.article_parser.parse_article(article)

    def write_pairs(self):
        """ Writing the extracted translations to file """
        self.article_parser.write_word_pairs_to_file()
Example #7
0
# app.secret_key = os.urandom(12)
app.config['MAIL_SERVER'] = 'smtp.sendgrid.net'
app.config['MAIL_PORT'] = '465'
app.config['MAIL_USE_SSL'] = 'True'
app.config['MAIL_USERNAME'] = '******'
app.config['PASSWORD'] = '******'.format(os.getenv('emailapikey'))
app.config['MAIL_DEFAULT_SENDER'] = '*****@*****.**'
app.config['SESSION_TYPE'] = 'redis'
app.config['SESSION_REDIS'] = Redis('192.168.5.75')
app.config['SECRET_KEY'] = os.getenv('sessionkey')
logger = Logger("BillTrakCore")
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%d-%b-%y %H:%M:%S',
                    level=logger.info)

hdlr = LogHandler()
logger.addHandler(hdlr)

mail = Mail(app)
Session().init_app(app)
app.wsgi_app = ProxyFix(app.wsgi_app, x_host=1, x_proto=1)
# @app.before_request
# def before_request():
#     if not request.is_secure:
#         url = request.url.replace("http://", "https://", 1)
#         # code = 301
#         return redirect(url)


@app.context_processor
def inject_user():