Exemple #1
0
def allCategory(url):

    s = requests.session()
    s.headers.update(headers)
    db = dbHandle()
    r = s.get(url)
    if r.status_code == 200:
        r.encoding = 'utf-8'
        text = r.text
        obj = etree.HTML(text)

    #职位分类
    jobCategory = r'//*[@id="sidebar"]/div//div'

    #职位细分-分类
    detailCategory = r'./div[2]//dl/dd//a'

    #职位名
    jobTitle = r'./text()'

    #职位ur
    jobUrl = r'./@href'

    allCategorys = obj.xpath(jobCategory)

    for catetory in allCategorys:
        if len(catetory.xpath(detailCategory)) != 0:
            for job in catetory.xpath(detailCategory):
                # print(job.xpath(jobTitle)[0])
                # print(job.xpath(jobUrl)[0])
                sql = """insert into jobCategories(category_name,category_url) values('{0}','{1}')""".\
                    format(job.xpath(jobTitle)[0],job.xpath(jobUrl)[0])
                db.insert_db(sql)
Exemple #2
0
    def __init__(self):

        # 初始化session,数据链接
        self.db = dbHandle()
        self.s = requests.session()
        self.s.headers.update(headers)
        self.post_url = """https://www.lagou.com/jobs/positionAjax.json?city=%E6%B7%B1%E5%9C%B3&needAddtionalResult=false&isSchoolJob=0"""
Exemple #3
0
    def ChangeRepl(self, _content):
        try:
            groupname, region, type = _content[0], _content[1], _content[-1]
            if type == 'dow':  #宕机任务,需重新选择节点并监听同步
                for i in range(0, 3):
                    host, port = self.__get_master_for_region(
                        region, groupname)
                    with closing(dbHandle(host, port)) as dbhandle:
                        mysqlstate = dbhandle.RetryConn()  # 检测是否能正常连接
                    time.sleep(1)
                if mysqlstate:
                    zkHander().CreateWatch(
                        host=host.replace('.', '-'),
                        addition=True,
                        region=region,
                        region_for_groupname=groupname)  # 重新创建master检测
                else:
                    return self.__change_new_master(region=region,
                                                    groupname=groupname)

            elif type == 'up':  #只进行监听,用于手动添加了同步任务
                self.__up_watch_master(region=region, groupname=groupname)
            return True
        except:
            Logging(msg='addition task failed!', level='error')
            return False
Exemple #4
0
def dbInit():
    handle = dbHandle('survey.db')
    handle.clearDB()
    courses = getCSVList('courses.csv')
    classIDs = []
    for c in courses:
        cls = formatClass(c)
        try:
            classIDs.append(handle.addClass(cls))
        except:
            pass

    users = getCSVList('passwords.csv')
    for u in users:
        usr = formatUser(u)
        try:
            handle.addUser(usr)
        except:
            pass

    enrollments = getCSVList('enrolments.csv')
    for e in enrollments:
        classID = getClassID(e[1:], courses, classIDs)
        try:
            handle.addEnrollment(e[0], classID)
        except:
            pass

    del handle
Exemple #5
0
 def __init__(self, dfData, dfInfo, aucTime):
     self.df = dfData
     self.date = datetime.datetime.strptime(self.df["date"][0], "%Y%m%d")
     self.dfInfo = dfInfo
     self.db = dbHandle()
     self.AucTime = aucTime
     self.initCleanRegulation()
Exemple #6
0
def salary():
    db = dbHandle()

    #工资区间划分
    #0-5,5-10,10-15,15-25,25-40,40+
    less_5 = db.query_db(less_5_sql)
    less_10 = db.query_db(less_10_sql)
    less_15 = db.query_db(less_15_sql)
    less_25 = db.query_db(less_25_sql)
    less_40 = db.query_db(less_40_sql)
    more_40 = db.query_db(more_40_sql)

    print("less 5:{0}".format(less_5))
    print("less 10:{0}".format(less_10))
    print("less 15:{0}".format(less_15))
    print("less 25:{0}".format(less_25))
    print("less 40:{0}".format(less_40))
    print("more 40:{0}".format(more_40))

    # new_list = []
    # for data in datas:
    #     newdata = data[0].split('-')
    #     if len(newdata)>1:
    #         total = int(newdata[0].strip('K').strip('k'))+int(newdata[1].strip('K').strip('k'))
    #         avg = total/2
    #         new = (avg,data[1],data[2])
    #         new_sql = "update position_detail_bak set salary={0} where salary='{1}';".format(avg,data[0])
    #         print(new_sql)
    #         #print(new)
    #         new_list.append(new)
    #
    # print(new_list)
    # key_lis = ['1-3年','3-5年','不限','5-10年','1年以下','10年以上','应届毕业生']

    return render_template('salary.html')
 def __init__(self, dfInfo, date, aucTime):
     self.timeFilePath = os.getcwd() + '/' + 'timeSeriesFile/'
     self.barDict = {}
     self.splitDict = {}
     self.dfInfo = dfInfo
     self.db = dbHandle()
     self.timePoint = date
     self.cycle = [1, 5, 15, 30, 60]
     self.AucTime = aucTime
     self.initStart()
Exemple #8
0
def getContent(table_name):
    db = dbHandle()
    jieba.del_word("电影")
    jieba.del_word("导演")
    jieba.del_word("没有")
    jieba.del_word("影片")
    jieba.del_word("看到")
    query_sql = "select comment_content from {0}".format(table_name)
    texts = db.query_db(query_sql)
    text = ''
    for t in texts:
        text += t[0]
    #result = jieba.analyse.textrank(text, topK=1000, withWeight=True)
    result = jieba.cut(text, cut_all=True)
    wl_space_split = " ".join(result)
    return wl_space_split
Exemple #9
0
    def insert_data(self, data):
        #db = dbHandle()
        data = data
        db = dbHandle()
        print(data)
        texts = self.get_job_detail(data)
        text = '\n'.join(texts)
        text = text.replace('"', "'")

        # 如果职位id对应的描述为空则插入数据
        job_detail = db.query_db(job_detail_sql.format(data))
        print(job_detail)
        if job_detail[0][0] == None:
            update_sql = """update position_detail set positionText="{0}" where positionId='{1}'""".format(
                text, data)
            print("正在执行更新语句:{0}".format(update_sql))
            db.update_db(update_sql.format(text, data))
Exemple #10
0
                    financeStage = result['financeStage']
                    industryField = result['industryField']
                    companySize = result['companySize']
                    companyLabelList = result['companyLabelList']
                    companyFullName = result['companyFullName']

                    #拼装公司sql数据
                    company_sql = """insert into company_detail(companyId,city,district,financeStage,industryField,companySize,companyLabelList,companyFullName) \
                    values('{0}','{1}','{2}','{3}','{4}','{5}',"{6}",'{7}')""".format(
                        companyId, city, district, financeStage, industryField,
                        companySize, companyLabelList, companyFullName)

                    print("company_sql:{0}".format(company_sql))

                    #公司数据入库
                    db = dbHandle()
                    company_datas = db.query_db(
                        query_companyid_sql.format(companyId))

                    #如果公司id不存在则入库
                    if len(company_datas) == 0:
                        db.insert_db(company_sql)

                    #职位数据
                    position_id = result['positionId']
                    position_name = result['positionName']
                    position_workYear = result['workYear']
                    position_education = result['education']
                    position_companyId = result['companyId']
                    position_salary = result['salary']
                    position_Advantage = result['positionAdvantage']
Exemple #11
0
from dbHandle import dbHandle
handle = dbHandle("survey.db")


def authenticator(username, password):
    myUser = handle.getUser(username)
    if (password == myUser.getPass()):
        return myUser
    else:
        return -1
Exemple #12
0
def education():
    db = dbHandle()
    datas = db.query_db(education_sql)
    print(datas)
    return render_template('workyear.html', datas=datas)
Exemple #13
0
def year():
    db = dbHandle()
    datas = db.query_db(work_year_sql)
    print(datas)
    return render_template('education.html', datas=datas)
Exemple #14
0
def index():
    db = dbHandle()
    datas = db.query_db(category_count_sql)
    print(datas)
    return render_template('data.html', datas=datas[:50])
Exemple #15
0
 def __init__(self):
     self.s = requests.session()
     self.db = dbHandle()