Exemple #1
0
def crawl(test = False):
    browser = browserobject.start_browser("http://jobs.astrazeneca.com", test)
    COMPANY = 'Astra Zeneca'

    #element = browser.find_element_by_xpath("""//*[@id="top"]/div/div[2]/div[5]/div[4]/p[2]/a/strong""")
    #browser.execute_script("return arguments[0].scrollIntoView();", element)


    browser.find_element_by_id("316").click() #sweden
    time.sleep(1)
    browser.find_element_by_id("1257").click() #student opportunities
    time.sleep(1)
    browser.find_element_by_class_name('submit').click()

    job_list = browser.find_elements_by_class_name('job-res-description')
    list_of_thesis = []

    for l in job_list:
        
        title = l.find_element_by_class_name('job-title')
        if 'diploma' in title.text.lower():
            location = l.find_element_by_class_name('locations')
            #print('Title: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, location.text, title.get_attribute('href')))

            list_of_thesis.append(dict(title= title.text, location = location.text, link=title.get_attribute('href'), company = COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #2
0
def crawl(test=False):
    browser = browserobject.start_browser("http://jobs.astrazeneca.com", test)
    COMPANY = 'Astra Zeneca'

    #element = browser.find_element_by_xpath("""//*[@id="top"]/div/div[2]/div[5]/div[4]/p[2]/a/strong""")
    #browser.execute_script("return arguments[0].scrollIntoView();", element)

    browser.find_element_by_id("316").click()  #sweden
    time.sleep(1)
    browser.find_element_by_id("1257").click()  #student opportunities
    time.sleep(1)
    browser.find_element_by_class_name('submit').click()

    job_list = browser.find_elements_by_class_name('job-res-description')
    list_of_thesis = []

    for l in job_list:

        title = l.find_element_by_class_name('job-title')
        if 'diploma' in title.text.lower():
            location = l.find_element_by_class_name('locations')
            #print('Title: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, location.text, title.get_attribute('href')))

            list_of_thesis.append(
                dict(title=title.text,
                     location=location.text,
                     link=title.get_attribute('href'),
                     company=COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #3
0
def crawl(test=False):
    browser = browserobject.start_browser(
        "http://saabgroup.com/sv/career/job-opportunities/?&c=Sweden", test)

    COMPANY = "SAAB"
    #element = browser.find_element_by_xpath("""//*[@id="top"]/div/div[2]/div[5]/div[4]/p[2]/a/strong""")
    #browser.execute_script("return arguments[0].scrollIntoView();", element)
    list_of_thesis = []

    table = browser.find_element_by_class_name('vacancies')
    table_rows = table.find_elements_by_tag_name('li')

    for row in table_rows:
        if 'examen' in row.text.lower():
            #print(row.text)
            title = row.find_element_by_class_name('title')
            location = row.find_element_by_class_name('location')
            date = row.find_element_by_class_name('date')
            link = row.find_element_by_tag_name('a')

            list_of_thesis.append(
                dict(title=title.text,
                     location=location.text,
                     link=link.get_attribute('href'),
                     company=COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #4
0
def crawl(test = False):
    browser = browserobject.start_browser("https://tetrapak.taleo.net/careersection/3/jobsearch.ftl?lang=en", test)

    COMPANY = "Tetra Pak"
    list_of_thesis = []
    #element = browser.find_element_by_xpath("""//*[@id="top"]/div/div[2]/div[5]/div[4]/p[2]/a/strong""")
    #browser.execute_script("return arguments[0].scrollIntoView();", element)

    time.sleep(0.3)
    table = browser.find_element_by_class_name('table')
    table_rows = table.find_elements_by_tag_name('tr')
    #print(table.get_attribute('innerHTML'))

    for row in table_rows:
        
        if 'thesis' in row.text.lower():
            
            title = row.find_element_by_tag_name('a')
            location = row.find_element_by_xpath('./td[2]')
            date = row.find_element_by_xpath('./td[3]')
            link = title.get_attribute('href')

            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
            list_of_thesis.append(dict(title= title.text, location = location.text, link=title.get_attribute('href'), company = COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #5
0
def crawl(test = False):
    browser = browserobject.start_browser("https://xjobs.brassring.com/TGWebHost/searchresults.aspx?partnerid=25079&siteid=5171&Codes=Volvo&AgentID=9780452&Function=runquery", test)
    url = 'https://xjobs.brassring.com/TGWebHost/searchresults.aspx?partnerid=25079&siteid=5171&Codes=Volvo&AgentID=9780452&Function=runquery'
    COMPANY = "Volvo Group"
    list_of_thesis = []
    #element = browser.find_element_by_xpath("""//*[@id="top"]/div/div[2]/div[5]/div[4]/p[2]/a/strong""")
    #browser.execute_script("return arguments[0].scrollIntoView();", element)

    time.sleep(0.3)
    table = browser.find_element_by_id('idSearchresults')
    table_rows = table.find_elements_by_tag_name('tr')


    for row in table_rows:
        row.text
        if 'thesis' in row.text.lower():
            
            title = row.find_element_by_tag_name('a')
            location = row.find_element_by_xpath('./td[5]')
            if 'sweden' in location.text.lower():
                continue
            date = row.find_element_by_xpath('./td[3]')
            link = title.get_attribute('href')

            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, url))
            list_of_thesis.append(dict(title= title.text, location = location.text, company = COMPANY, link=url))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #6
0
def crawl(test = False):
    browser = browserobject.start_browser("http://www.ifsworld.com/se/verksamheten/arbeta-hos-oss/lediga-jobb/apply-for-a-job/", test)

    COMPANY = "IFS"

    browser.switch_to_frame("riframe") 
    table = browser.find_element_by_id('jobsTable')
    #print(table.get_attribute('innerHTML'))
    table_rows = table.find_elements_by_tag_name('tr')[1:]

    list_of_thesis = []

    for row in table_rows:
        if 'thesis' in row.text.lower():

            title = row.find_element_by_tag_name('a')
            location = row.find_element_by_xpath('./td[3]')
            date = row.find_element_by_xpath('./td[2]')
            link = title.get_attribute('href')

            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
            list_of_thesis.append(dict(title= title.text, location = location.text, link=title.get_attribute('href'), company = COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #7
0
def crawl(test = False):
    browser = browserobject.start_browser("http://jobs.gecareers.com/search?q=thesis", test)

    COMPANY = "General Electric"
    list_of_thesis = []
    table = browser.find_element_by_id('searchresults')
    table_rows = table.find_elements_by_tag_name('tr')[2:]


    for row in table_rows:
        
        if 'thesis' in row.text.lower() and 'sweden' in row.text.lower():

            title = row.find_element_by_tag_name('a')
            location = row.find_element_by_xpath('./td[2]')
            date = row.find_element_by_xpath('./td[3]')
            link = title.get_attribute('href')

            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
            list_of_thesis.append(dict(title= title.text, location = location.text, link=link, company = COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #8
0
def crawl(test = False):
    browser = browserobject.start_browser("http://corporate.vattenfall.se/jobba-hos-oss/jobb/lediga-jobb/?country=Sweden&location=&function=&position=Internship&education=", test)

    COMPANY = "Vattenfall"
    list_of_thesis = []
    table = browser.find_element_by_id('DataTables_Table_0')
    table_rows = table.find_elements_by_tag_name('tr')[1:]


    for row in table_rows:
        
        
            

        title = row.find_element_by_tag_name('a')
        location = row.find_element_by_xpath('./td[2]')
        date = row.find_element_by_xpath('./td[3]')
        link = title.get_attribute('href')


        #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
        list_of_thesis.append(dict(title= title.text, location = location.text, link=title.get_attribute('href'), company = COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #9
0
def crawl(test = False):
    browser = browserobject.start_browser("https://boliden.csod.com/ats/careersite/search.aspx?site=5&c=boliden", test)
    baseurl = "https://boliden.csod.com/ats/careersite/JobDetails.aspx?id="
    COMPANY = 'Boliden'
    #job_list = browser.find_elements_by_tag_name('''li''')
    job_list = browser.find_elements_by_xpath('''//ul/li''')

    list_of_thesis = []

    for l in job_list:
        try:
            title = l.find_element_by_tag_name('a')
            location_data = l.find_element_by_class_name("FieldValue").text
            
        except:
            continue
        department, location = re.findall('\((.*?)\|(.*?)\)', location_data)[0]
        
        link_info = title.get_attribute('href')
        match = re.findall('.*?id=([^"]*)', link_info)
        

        #print('Title: {}\nDepartment: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, department, location, baseurl+match[0]))
        list_of_thesis.append(dict(title= title.text, location = location, link=baseurl+match[0], company = COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #10
0
def crawl(test=False):
    browser = browserobject.start_browser(
        "http://www.billerudkorsnas.com/sv/Karriar/Lediga-jobb/Exjobb2/", test)
    COMPANY = 'Billerud'

    job_list = browser.find_elements_by_xpath(
        '''//*[@id="primarycontent"]/article/table/tbody/tr''')
    list_of_thesis = []
    for l in job_list:

        title = l.find_element_by_class_name('listTitle')
        location = l.find_element_by_xpath('''.//td[2]''')
        link = l.find_element_by_tag_name('a')

        #print('Title: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, location.text, link.get_attribute('href')))
        list_of_thesis.append(
            dict(title=title.text,
                 location=location.text,
                 link=link.get_attribute('href'),
                 company=COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #11
0
def crawl(test=False):
    browser = browserobject.start_browser(
        "http://www.ifsworld.com/se/verksamheten/arbeta-hos-oss/lediga-jobb/apply-for-a-job/",
        test)

    COMPANY = "IFS"

    browser.switch_to_frame("riframe")
    table = browser.find_element_by_id('jobsTable')
    #print(table.get_attribute('innerHTML'))
    table_rows = table.find_elements_by_tag_name('tr')[1:]

    list_of_thesis = []

    for row in table_rows:
        if 'thesis' in row.text.lower():

            title = row.find_element_by_tag_name('a')
            location = row.find_element_by_xpath('./td[3]')
            date = row.find_element_by_xpath('./td[2]')
            link = title.get_attribute('href')

            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
            list_of_thesis.append(
                dict(title=title.text,
                     location=location.text,
                     link=title.get_attribute('href'),
                     company=COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #12
0
def crawl(test=False):
    browser = browserobject.start_browser(
        "http://www.home.sandvik/se/karriar/student/examensarbete/examensarbeten/",
        test)

    COMPANY = "Sandvik"
    list_of_thesis = []

    table = browser.find_element_by_tag_name('thead')
    table_rows = table.find_elements_by_tag_name('tr')[1:]

    for row in table_rows:
        title = row.find_element_by_xpath('./th[1]')
        subject = row.find_element_by_xpath('./th[2]')
        location = row.find_element_by_xpath('./th[3]')
        date = row.find_element_by_xpath('./th[4]')
        link = row.find_element_by_tag_name('a')

        list_of_thesis.append(
            dict(title=subject.text,
                 location=location.text,
                 link=link.get_attribute('href'),
                 company=COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #13
0
def crawl(test = False):
    browser = browserobject.start_browser("http://www.goteborgenergi.se/Om_oss/Karriar/Student/Examensarbete", test)

    COMPANY = "Göteborgs Energi"
    time.sleep(0.3)
    table = browser.find_element_by_class_name('Dx-Content-Table')
    table_rows = table.find_elements_by_tag_name('tr')[1:]

    list_of_thesis = []

    for row in table_rows:
        
        title = row.find_element_by_tag_name('a')
        location = 'Gothenburg'
        link = title.get_attribute('href')

        #print('Title: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, location, link))
        list_of_thesis.append(dict(title= title.text, location = location, link=title.get_attribute('href'), company = COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #14
0
def crawl(test = False):
    browser = browserobject.start_browser("http://jobsearch.scania.com/segerjoblist/search.aspx", test)

    COMPANY = "Scania"

    table = browser.find_element_by_id('dgSearchResult')
    table_rows = table.find_elements_by_tag_name('tr')[1:]
    list_of_thesis = []

    for row in table_rows:
        
        if 'examen' in row.text.lower():
            

            title = row.find_element_by_tag_name('a')
            date = row.find_element_by_xpath('./td[1]')
            location = row.find_element_by_xpath('./td[5]')
            app_date = row.find_element_by_xpath('./td[6]')
            link = title.get_attribute('href')


            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
            list_of_thesis.append(dict(title= title.text, location = location.text, link=title.get_attribute('href'), company = COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #15
0
def crawl(test = False):
    browser = browserobject.start_browser("http://new.abb.com/se/jobba-hos-oss/lediga-tjanster", test)
    COMPANY = 'ABB'


    time.sleep(1)    #might need to explicitly wait for website to load

    #element has to be in view to click -> scroll to element
    find_button = browser.find_element_by_class_name("""findButton""")
    filter_selection = browser.find_element_by_xpath("""//*[@id="Content_C001_Col00"]/div/div[2]/div[3]/span[4]/div/a/span""")
    browser.execute_script("return arguments[0].scrollIntoView();", find_button)
    filter_selection.click()
    browser.find_element_by_xpath("""//*[@data-option-array-index='4']""").click()
    time.sleep(0.5) #time for content to load
    job_list = browser.find_elements_by_xpath("""//*[@id="jobOffers"]/tbody/tr""")

    list_of_thesis = []

    for l in job_list:
        #print (l.text)
        title = l.find_element_by_tag_name('a')
        _, location, department, job_type, _  = l.find_elements_by_tag_name('td')
        #print('Title: {}\nLocation: {}\nDepartment: {}\nLink: {}\n\n'.format(title.text, location.text, department.text, title.get_attribute('href')))

        list_of_thesis.append(dict(title= title.text, location = location.text, link=title.get_attribute('href'), company = COMPANY))


    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #16
0
def crawl(test=False):
    browser = browserobject.start_browser(
        "http://jobsearch.scania.com/segerjoblist/search.aspx", test)

    COMPANY = "Scania"

    table = browser.find_element_by_id('dgSearchResult')
    table_rows = table.find_elements_by_tag_name('tr')[1:]
    list_of_thesis = []

    for row in table_rows:

        if 'examen' in row.text.lower():

            title = row.find_element_by_tag_name('a')
            date = row.find_element_by_xpath('./td[1]')
            location = row.find_element_by_xpath('./td[5]')
            app_date = row.find_element_by_xpath('./td[6]')
            link = title.get_attribute('href')

            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
            list_of_thesis.append(
                dict(title=title.text,
                     location=location.text,
                     link=title.get_attribute('href'),
                     company=COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #17
0
def crawl(test = False):
    browser = browserobject.start_browser("http://saabgroup.com/sv/career/job-opportunities/?&c=Sweden", test)

    COMPANY = "SAAB"
    #element = browser.find_element_by_xpath("""//*[@id="top"]/div/div[2]/div[5]/div[4]/p[2]/a/strong""")
    #browser.execute_script("return arguments[0].scrollIntoView();", element)
    list_of_thesis = []

    table = browser.find_element_by_class_name('vacancies')
    table_rows = table.find_elements_by_tag_name('li')

    for row in table_rows:
        if 'examen' in row.text.lower():
            #print(row.text)
            title = row.find_element_by_class_name('title')
            location = row.find_element_by_class_name('location')
            date = row.find_element_by_class_name('date')
            link = row.find_element_by_tag_name('a')


            list_of_thesis.append(dict(title= title.text, location = location.text, link=link.get_attribute('href'), company = COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
def crawl(test = False):
    browser = browserobject.start_browser("https://jobs.bombardier.com/key/final-thesis-bombardier-jobs.html", test)

    COMPANY = "Bombardier"

    table = browser.find_element_by_id('searchresults')
    #print(table.get_attribute('innerHTML'))
    table_rows = table.find_elements_by_tag_name('tr')[2:]
    list_of_thesis = []

    for row in table_rows:
        if 'thesis' in row.text.lower():
            location = row.find_element_by_xpath('./td[2]')

            if 'SE' not in location.text:
                continue

            title = row.find_element_by_tag_name('a')
            date = row.find_element_by_xpath('./td[3]')
            link = title.get_attribute('href')


            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
            list_of_thesis.append(dict(title= title.text, location = location.text, link=title.get_attribute('href'), company = COMPANY))
    
    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #19
0
def crawl(test=False):

    browser = browserobject.start_browser(
        "http://www.volvocars.com/intl/about/our-company/careers/job-search",
        test)
    job_list = browser.find_elements_by_xpath(
        """//*[@id="volvo"]/div[3]/div/div""")
    COMPANY = 'Volvo Cars'

    list_of_thesis = []
    for i in range(
            1,
            len(job_list) - 1
    ):  #1 to skip labels and len() -1 to skip last row "we are sorry..."
        position = job_list[i].find_element_by_tag_name('dt')

        #Test if thesis work
        if 'thesis' in position.text.lower():
            location, app_date = job_list[i].find_elements_by_tag_name('dd')
            link_description = job_list[i].find_element_by_css_selector(
                'a').get_attribute('href')

            #print(' Position: {} \n Location: {} \n Last Application Date: {}'.format(position.text, location.text, app_date.text))

            list_of_thesis.append(
                dict(title=position.text,
                     location=location.text,
                     link=link_description,
                     company=COMPANY))

            #Uncomment to look into every URL
            # browser.execute_script("window.open('');")
            # browser.switch_to_window(browser.window_handles[1])
            # browser.get(link_description)

            # description = browser.find_element_by_class_name('cl-description').text
            # body = browser.find_element_by_class_name('cl-description').get_attribute('outerHTML') #HTML to send to readability

            # time.sleep(1)
            # browser.close()
            # browser.switch_to_window(browser.window_handles[0])

            # # readable_article = Document(body).summary()
            # # with open('thesis' + str(i), 'w') as f:

            # #     print(readable_article, file=f)
            # # #print(description)

        else:
            continue

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #20
0
def crawl(test = False):
    browser = browserobject.start_browser("http://www.sweco.se/karriar/lediga-jobb/", test)

    COMPANY = "Sweco"
    list_of_thesis = []

    
    #show all jobs
    more_jobs_button = browser.find_element_by_class_name('jobsearchresult__link')
    try:
        while True:
            more_jobs_button.click()
            time.sleep(0.2) #delay to wait for content to load
    except WebDriverException:
        #print('all jobs naow')
        pass

    
    table = browser.find_element_by_class_name('jobsearchresult__table')
    table_rows = table.find_elements_by_tag_name('tr')[1:]

    for row in table_rows:
        
        if any(word in row.text.lower() for word in ['examen','exjobb']):
        
            

            title = row.find_element_by_tag_name('a')
            date = row.find_element_by_xpath('./td[2]')
            location = row.find_element_by_xpath('./td[3]')
            link = title.get_attribute('href')


            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
            list_of_thesis.append(dict(title= title.text, location = location.text, link=title.get_attribute('href'), company = COMPANY))


    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #21
0
def crawl(test=False):
    browser = browserobject.start_browser(
        "https://skf.tms.hrdepartment.com/cgi-bin/a/searchjobs_quick.cgi?kand=thesis&country=&qty=25&sj=1&order=jobs.timedate+DESC&search=Search+Jobs",
        test)
    #browser = browserobject.start_browser("https://skf.tms.hrdepartment.com/cgi-bin/a/searchjobs_quick.cgi?kand=thesis&geog=custom__59&country=&qty=25&sj=1&order=jobs.timedate+DESC&search=Search+Jobs")

    COMPANY = "SKF"
    list_of_thesis = []

    try:
        alert = browser.find_element_by_class_name('alert')
        print(alert.text)
        return None
        sys.exit()

    except NoSuchElementException:
        pass

    table = browser.find_element_by_tag_name('tbody')
    table_rows = table.find_elements_by_tag_name('tr')

    for row in table_rows:
        if 'thesis' in row.text.lower():

            title = row.find_element_by_xpath('./td[1]')
            location = row.find_element_by_xpath('./td[2]')
            date = row.find_element_by_xpath('./td[3]')
            link = title.find_element_by_tag_name('a').get_attribute('href')

            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
            list_of_thesis.append(
                dict(title=title.text,
                     location=location.text,
                     link=link,
                     company=COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #22
0
def crawl(test=False):
    browser = browserobject.start_browser(
        "http://new.abb.com/se/jobba-hos-oss/lediga-tjanster", test)
    COMPANY = 'ABB'

    time.sleep(1)  #might need to explicitly wait for website to load

    #element has to be in view to click -> scroll to element
    find_button = browser.find_element_by_class_name("""findButton""")
    filter_selection = browser.find_element_by_xpath(
        """//*[@id="Content_C001_Col00"]/div/div[2]/div[3]/span[4]/div/a/span"""
    )
    browser.execute_script("return arguments[0].scrollIntoView();",
                           find_button)
    filter_selection.click()
    browser.find_element_by_xpath(
        """//*[@data-option-array-index='4']""").click()
    time.sleep(0.5)  #time for content to load
    job_list = browser.find_elements_by_xpath(
        """//*[@id="jobOffers"]/tbody/tr""")

    list_of_thesis = []

    for l in job_list:
        #print (l.text)
        title = l.find_element_by_tag_name('a')
        _, location, department, job_type, _ = l.find_elements_by_tag_name(
            'td')
        #print('Title: {}\nLocation: {}\nDepartment: {}\nLink: {}\n\n'.format(title.text, location.text, department.text, title.get_attribute('href')))

        list_of_thesis.append(
            dict(title=title.text,
                 location=location.text,
                 link=title.get_attribute('href'),
                 company=COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #23
0
def crawl(test = False):
    browser = browserobject.start_browser("https://jobs.ericsson.com/search/?q=&locationsearch=sweden", test)

    COMPANY = "Ericsson"

    if int(browser.find_element_by_xpath("""//*[@id="content"]/div[3]/div/div/span/span[1]/b[2]""").text) > 25:
        second_page = True
    list_of_thesis = []

    table_rows = browser.find_elements_by_css_selector('tr.data-row.clickable')

    for _ in range(2):

        for row in table_rows:

            if 'thesis' in row.text.lower():
                title = row.find_element_by_tag_name('a')
                location = row.find_element_by_xpath('./td[2]')
                date = row.find_element_by_xpath('./td[3]')
                link = title.get_attribute('href')

                #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
                list_of_thesis.append(dict(title= title.text, location = location.text, link=link, company = COMPANY))

        if second_page:
            browser.find_element_by_class_name('paginationItemLast').click()
            table_rows = browser.find_elements_by_css_selector('tr.data-row.clickable')
        else:
            break



            

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #24
0
def crawl(test = False):
    browser = browserobject.start_browser("http://www.billerudkorsnas.com/sv/Karriar/Lediga-jobb/Exjobb2/", test)
    COMPANY = 'Billerud'

    job_list = browser.find_elements_by_xpath('''//*[@id="primarycontent"]/article/table/tbody/tr''')
    list_of_thesis = []
    for l in job_list:
        
        title = l.find_element_by_class_name('listTitle')
        location = l.find_element_by_xpath('''.//td[2]''')
        link = l.find_element_by_tag_name('a')
        
        #print('Title: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, location.text, link.get_attribute('href')))
        list_of_thesis.append(dict(title= title.text, location = location.text, link=link.get_attribute('href'), company = COMPANY))
    

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #25
0
def crawl(test=False):
    browser = browserobject.start_browser(
        "https://xjobs.brassring.com/TGWebHost/searchresults.aspx?partnerid=25079&siteid=5171&Codes=Volvo&AgentID=9780452&Function=runquery",
        test)
    url = 'https://xjobs.brassring.com/TGWebHost/searchresults.aspx?partnerid=25079&siteid=5171&Codes=Volvo&AgentID=9780452&Function=runquery'
    COMPANY = "Volvo Group"
    list_of_thesis = []
    #element = browser.find_element_by_xpath("""//*[@id="top"]/div/div[2]/div[5]/div[4]/p[2]/a/strong""")
    #browser.execute_script("return arguments[0].scrollIntoView();", element)

    time.sleep(0.3)
    table = browser.find_element_by_id('idSearchresults')
    table_rows = table.find_elements_by_tag_name('tr')

    for row in table_rows:
        row.text
        if 'thesis' in row.text.lower():

            title = row.find_element_by_tag_name('a')
            location = row.find_element_by_xpath('./td[5]')
            if 'sweden' in location.text.lower():
                continue
            date = row.find_element_by_xpath('./td[3]')
            link = title.get_attribute('href')

            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, url))
            list_of_thesis.append(
                dict(title=title.text,
                     location=location.text,
                     company=COMPANY,
                     link=url))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #26
0
def crawl(test = False):
    browser = browserobject.start_browser("http://www.afconsult.com/sv/jobba-hos-oss/lediga-jobb/", test)
    COMPANY = 'ÅF'
    list_of_thesis = []
    time.sleep(1)    #might need to explicitly wait for website to load

    find_button = browser.find_element_by_id("""dk1-combobox""").click() #category
    try:
        option = browser.find_element_by_id("dk1--2024534255__jobFilters").click() #

        time.sleep(1)
        content = browser.find_element_by_id("contentItemListing_6373")
        job_list = content.find_elements_by_css_selector('.block.col.regular-12')
        

        for l in job_list:
            title = l.find_element_by_css_selector(".col.regular-6")
            if "exjobb" in title.text.lower():
                #print(title.text)
                link = l.find_element_by_tag_name("a")
                location, app_date = l.find_elements_by_css_selector(".col.regular-3")

                #print('Title: {}\nLocation: {}\nApplication Date: {}\n\n'.format(title.text, location.text, app_date.text, link.get_attribute('href')))

                list_of_thesis.append(dict(title= title.text, location = location.text, link=link.get_attribute('href'), company = COMPANY))
    except NoSuchElementException:
        #print('nothing available')
        pass

    

    if list_of_thesis:
        return list_of_thesis
        browser.quit()
    else:
        if not test: browser.quit()
        return []
Exemple #27
0
def crawl(test=False):
    browser = browserobject.start_browser(
        "https://boliden.csod.com/ats/careersite/search.aspx?site=5&c=boliden",
        test)
    baseurl = "https://boliden.csod.com/ats/careersite/JobDetails.aspx?id="
    COMPANY = 'Boliden'
    #job_list = browser.find_elements_by_tag_name('''li''')
    job_list = browser.find_elements_by_xpath('''//ul/li''')

    list_of_thesis = []

    for l in job_list:
        try:
            title = l.find_element_by_tag_name('a')
            location_data = l.find_element_by_class_name("FieldValue").text

        except:
            continue
        department, location = re.findall('\((.*?)\|(.*?)\)', location_data)[0]

        link_info = title.get_attribute('href')
        match = re.findall('.*?id=([^"]*)', link_info)

        #print('Title: {}\nDepartment: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, department, location, baseurl+match[0]))
        list_of_thesis.append(
            dict(title=title.text,
                 location=location,
                 link=baseurl + match[0],
                 company=COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #28
0
def crawl(test = False):
    browser = browserobject.start_browser("http://www.home.sandvik/se/karriar/student/examensarbete/examensarbeten/", test)

    COMPANY = "Sandvik"
    list_of_thesis = []

    table = browser.find_element_by_tag_name('thead')
    table_rows = table.find_elements_by_tag_name('tr')[1:]

    for row in table_rows:
        title = row.find_element_by_xpath('./th[1]')
        subject = row.find_element_by_xpath('./th[2]')
        location = row.find_element_by_xpath('./th[3]')
        date = row.find_element_by_xpath('./th[4]')
        link = row.find_element_by_tag_name('a')

        list_of_thesis.append(dict(title= subject.text, location = location.text, link=link.get_attribute('href'), company = COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #29
0
def crawl(test=False):
    browser = browserobject.start_browser(
        "https://jobs.bombardier.com/key/final-thesis-bombardier-jobs.html",
        test)

    COMPANY = "Bombardier"

    table = browser.find_element_by_id('searchresults')
    #print(table.get_attribute('innerHTML'))
    table_rows = table.find_elements_by_tag_name('tr')[2:]
    list_of_thesis = []

    for row in table_rows:
        if 'thesis' in row.text.lower():
            location = row.find_element_by_xpath('./td[2]')

            if 'SE' not in location.text:
                continue

            title = row.find_element_by_tag_name('a')
            date = row.find_element_by_xpath('./td[3]')
            link = title.get_attribute('href')

            #print('Title: {}\nDate: {}\nLocation: {}\nLink: {}\n\n'.format(title.text, date.text, location.text, link))
            list_of_thesis.append(
                dict(title=title.text,
                     location=location.text,
                     link=title.get_attribute('href'),
                     company=COMPANY))

    if list_of_thesis:
        browser.quit()
        return list_of_thesis
    else:
        if not test: browser.quit()
        return []
Exemple #30
0
# -*- coding: utf-8 -*-
import browserobject
import sys
import os
import time

sys.path.insert(1, os.path.join(sys.path[0], '..'))

from json_append import *
from readability.readability import Document

browser = browserobject.start_browser(
    "http://www.volvocars.com/intl/about/our-company/careers/job-search")
job_list = browser.find_elements_by_xpath(
    """//*[@id="volvo"]/div[3]/div/div""")

list_of_thesis = []
for i in range(
        1,
        len(job_list) -
        1):  #1 to skip labels and len() -1 to skip last row "we are sorry..."
    position = job_list[i].find_element_by_tag_name('dt')

    #Test if thesis work
    if 'thesis' in position.text.lower():
        location, app_date = job_list[i].find_elements_by_tag_name('dd')
        link_description = job_list[i].find_element_by_css_selector(
            'a').get_attribute('href')

        print(' Position: {} \n Location: {} \n Last Application Date: {}'.
              format(position.text, location.text, app_date.text))
Exemple #31
0
# -*- coding: utf-8 -*-
import browserobject
import sys
import os
import time

sys.path.insert(1, os.path.join(sys.path[0], '..'))

from json_append import *
from readability.readability import Document




browser = browserobject.start_browser("http://www.volvocars.com/intl/about/our-company/careers/job-search")
job_list = browser.find_elements_by_xpath("""//*[@id="volvo"]/div[3]/div/div""")

list_of_thesis = []
for i in range(1, len(job_list)-1): #1 to skip labels and len() -1 to skip last row "we are sorry..."
    position = job_list[i].find_element_by_tag_name('dt')
    
    #Test if thesis work
    if 'thesis' in position.text.lower():
        location, app_date = job_list[i].find_elements_by_tag_name('dd')
        link_description = job_list[i].find_element_by_css_selector('a').get_attribute('href')

        print(' Position: {} \n Location: {} \n Last Application Date: {}'.format(position.text, location.text, app_date.text))

####
        list_of_thesis.append(dict(title= position.text, location = location.text, link=link_description))
####
import browserobject

browser = browserobject.start_browser("http://new.abb.com/se/jobba-hos-oss/lediga-tjanster")

#element has to be in view to click -> scroll to element
find_button = browser.find_element_by_class_name("""findButton""")
filter_selection = browser.find_element_by_xpath("""//*[@id="Content_C001_Col00"]/div/div[2]/div[3]/span[4]/div/a/span""")
browser.execute_script("return arguments[0].scrollIntoView();", find_button)
filter_selection.click()
browser.find_element_by_xpath("""//*[@data-option-array-index='4']""").click()
time.sleep(0.5) #time for content to load
job_list = browser.find_elements_by_xpath("""//*[@id="jobOffers"]/tbody/tr""")

list_of_thesis = []

for l in job_list:
    #print (l.text)
    title = l.find_element_by_tag_name('a')
    _, location, department, job_type, _  = l.find_elements_by_tag_name('td')
    print('Title: {}\nLocation: {}\nDepartment: {}\nLink: {}\n\n'.format(title.text, location.text, department.text, title.get_attribute('href')))

    list_of_thesis.append(dict(title= title.text, location = location.text, link=title.get_attribute('href')))

if list_of_thesis:
    #json_append.update_json(list_of_thesis)
    pass