Ejemplo n.º 1
0
def main():
    global _zip_main
    modulename = _zip_main if _zip_main[-4:] != '-dev' else _zip_main[:-4]
    sys.FG_RELOAD = True
    while sys.FG_RELOAD:
        sys.FG_RELOAD = False
        m = sys.reload(modulename)
        if hasattr(m, 'init'):
            m.init()
        m.main()
Ejemplo n.º 2
0
import requests
import datetime
import re
import time
import random
import sys

try:
    from urllib import parse as parse

except:
    import urllib as parse

    sys.reload()
    sys.setdefaultencoding('utf-8')


def init_cookies():
    """
    return the cookies after your first visit
    """
    headers = {
        'Upgrade-Insecure-Requests': '1',
        'Host': 'm.lagou.com',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'User-Agent':
        'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
        'DNT': '1',
        'Cache-Control': 'max-age=0',
        'Referrer Policy': 'no-referrer-when-downgrade',
Ejemplo n.º 3
0
#-*- encoding:utf-8 -*-
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
import sys
try:
    sys.reload(sys)
    sys.setdefaultencoding('utf-8')
except:
    pass

import mlstudiosdk.modules
import mlstudiosdk.solution_gallery
from mlstudiosdk.modules.components.component import LComponent
from mlstudiosdk.modules.components.settings import Setting
from mlstudiosdk.modules.components.utils.orange_table_2_data_frame import table2df
from mlstudiosdk.modules.algo.data import Domain, Table
from mlstudiosdk.modules.algo.evaluation import Results
from mlstudiosdk.modules.algo.data.variable import DiscreteVariable, ContinuousVariable
from mlstudiosdk.modules.utils.itemlist import MetricFrame
import warnings

import jieba.posseg as pseg
import codecs
import os
import re

import util  #modify the code for test
#import mlstudiosdk.modules.components.nlp.Text_Extract_Keywords_Util as util
warnings.filterwarnings('ignore')

Ejemplo n.º 4
0
import requests
from requests import Session

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)))
from util.excel_helper import mkdirs_if_not_exists
from util.file_reader import parse_job_xml
from util import log

try:
    from urllib import parse as parse

except:
    import urllib as parse

    sys.reload()
    sys.setdefaultencoding('utf-8')


def init_cookies():
    """
    return the cookies after your first visit
    """
    headers = {
        'Upgrade-Insecure-Requests': '1',
        'Host': 'm.lagou.com',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
        'DNT': '1',
        'Cache-Control': 'max-age=0',
        'Referrer Policy': 'no-referrer-when-downgrade',
    Simple harvester for Dutch last names.

    Data-source:
        http://www.meertens.knaw.nl/nfb/

    :copyright: (c) 2016 Koninklijke Bibliotheek, by Willem-Jan Faber.
    :license: GPLv3
    https://github.com/KBNLresearch/Narralyzer/blob/master/licence.txt
"""

import lxml.html
import sys
import urllib.request

sys.reload(sys)
sys.setdefaultencoding('utf-8')


BASEURL = "http://www.meertens.knaw.nl/nfb/lijst_namen.php?operator=cn&naam=%s"
DEBUG = False

vowels = ['a',
          'e',
          'i',
          'o',
          'u']

forbidden = ['Naam',
             'Centraal Bureau voor Genealogie',
             'KNAW/Meertens Instituut']