def get_jobs(): company_infos = get_super_brand_companies() for company in company_infos: job_infos = extract_jobs(company["name"], company["link"]) print("total jobs:", len(job_infos)) save_to_file(company["name"], job_infos) return 1
def getJobList(soup, companyName, isNewLine): jobsContainer = soup.find("div", {"id": "NormalInfo"}) jobs = jobsContainer.find("tbody").find_all("tr") cnt = len(jobs) jobList = [] for i in range(cnt): job_details = jobs[i].find_all("td") if (len(job_details) == 5): jobList.append(getJobDetail(job_details)) save_to_file(jobList, companyName, isNewLine)
def export(): try: term = request.args.get("term").lower() if not term: raise Exception jobs = db.get(term) if not jobs: raise Exception save_to_file(term, jobs) return send_file(f"downloaded_file/remote_{term}_jobs.csv") except: return "lalalalal"
def export(): try: word = request.args.get('word') if not word: raise Exception() word = word.lower() jobs = db.get(word) if not jobs: raise Exception() save_to_file(jobs) return send_file("results.csv") except: return redirect("/")
def export(): # try: word = request.args.get('word') # TODO c++ 일때 버그 수정할것 # if not word: # raise Exception() word = word.lower() jobs = db.get(word) # if not jobs: # raise Exception() # save_to_file(f'{word}.csv',jobs) return send_file(f'{word}.csv', as_attachment=True)
def export(): try: job = request.args.get("job") if not job: raise Exception() job = job.lower() jobs = db.get(job) if not jobs: raise Exception() save_to_file(jobs) return send_file("jobs.csv") except: return redirect("./")
def main(): query = input('What do you want to query? ex: python\n') location = input('Where do you work at? ex: london\n') print(f'scrapping {query} job in {location}') websites = [url_indeed, url_stackoverflow] for website in websites: website['query'] = query website['location'] = location indeed_jobs = get_indeed_jobs(url_indeed) stackoverflow_jobs = get_stackoverflow_jobs(url_stackoverflow) jobs = indeed_jobs + stackoverflow_jobs save_to_file(jobs)
def export(): try: word = request.args.get('word') if not word: raise Exception() word = word.lower() jobs = db.get(word) if not jobs: raise Exception() save_to_file(jobs) return send_file('jobs.csv', as_attachment=True) except: return redirect("/")
def export(): try: word = request.args.get('word').lower() if not word: raise Exception() jobs = db.get(word) if not jobs: raise Exception() save_to_file(jobs) return send_file("jobs.csv", mimetype='application/x-csv', attachment_filename='summary_report.csv', as_attachment=True) # 이렇게 하면 이름 적은대로 저장됨. except: return redirect("/")
def export(): try: word=request.args.get("word") if not word: raise Exception() word=word.lower() dbWord=db.get(word) if not dbWord: raise Exception() save_to_file(dbWord) # Web-scraping 폴더에 csv 파일이 저장된다. return send_file("storage.csv") # export를 누른 사람에게 다운로드된다. except: return redirect("/")
def export(): try: word = request.args.get('term') if not word: raise Exception() word = word.lower() jobs = jobDB.get(word) if not jobs: raise Exception() save.save_to_file(jobs, word) return send_file(f'{word}.csv', as_attachment=True, attachment_filename=f'{word}.csv') except: return redirect('/')
def export(): # if fail try, run except try: keyword = request.args.get("keyword") if not keyword: # insidoe of try block, exception is called, then go to except and return redirect raise Exception() keyword = keyword.lower() jobs = db.get(keyword) if not jobs: raise Exception() save_to_file(jobs) return send_file("jobs.csv") except: return redirect("/")
def export(): try: word = request.args.get("word") if not word: raise Exception() word = word.lower() jobs = db.get(word) if not jobs: raise Exception() save_to_file(jobs, word) return send_file(f"{word}.csv", mimetype='text/csv', attachment_filename=f'{word}.csv', as_attachment=True) except: return redirect("/")
def export(): try: arg = request.args.get('term') if not arg: raise Exception() arg = arg.lower() jobs = db.get(arg) if not jobs: raise Exception() save_to_file(arg,jobs) return send_file( f"{arg}.csv", mimetype='text/csv', as_attachment=True, attachment_filename=f"{arg}.csv") except: return redirect("/")
def read(): parameter = request.args['parameter'].lower() #parameter = 'react' list = { 'stackoverflow': f"https://stackoverflow.com/jobs?q={parameter}&r=true", 'weworkremote': f"https://weworkremotely.com/remote-jobs/search?parameter={parameter}", 'remoteok': f"https://remoteok.io/remote-dev+{parameter}-jobs" } if parameter not in past_result_list: result = fetch_data(list) past_result_list[parameter]=result else: result = past_result_list[parameter] save_to_file(result) return{"result":result}
def export(): try: print("Download it...") word = request.args.get('word') if not word: raise Exception() word = word.lower() print("word: ", word) jobs = db.get(word) print("Job: ", jobs) if not jobs: raise Exception() save_to_file(jobs) print("saved the file...") return send_file("jobs.csv") except: # consequence of an error print("There is an error downloading") return redirect("/")
def oneFile(url,num): def makeTextlist(value): spl=value.text.strip().split('\n') spl[1]="("+spl[1]+")" return "".join(spl) pageText=getPage(url) soup=BeautifulSoup(pageText,"html.parser") Sec=soup.find("section",{"class":"bg-lightgray section-sm"}).find("p").find("a").text ParticipaintList=list(map(makeTextlist,soup.find("table",{"id":"datatable-PART_P_OC"}).find("tbody").find_all("tr"))) ObserveList=list(map(makeTextlist,soup.find("table",{"id":"datatable-PART_O_OC"}).find("tbody").find_all("tr"))) pLen=len(ParticipaintList) oLen=len(ObserveList) lis=[] for i in range(0,max(oLen,pLen)): if i>=pLen: ParticipaintList.append("") if i>=oLen: ObserveList.append("") line=[Sec,ParticipaintList[i],ObserveList[i]] lis.append(line) save_to_file(lis,num)
# %% import functions from youtube import extract_a_channel_using_playlists as get_video, fill_blanks, summarize_data from save import save_to_file # %% scrap data URL = "https://www.youtube.com/user/GrabTheGT/playlists" #grab_the_guitar data = get_video(URL) #%% summarize_data(data) # %% fill in the blanks and check stauts of the data print(fill_blanks(data)) # %% save file file_name = 'youtube_Omar' csv_format = ['Title', '', '', '', '', 'Link', 'Date', 'View'] save_to_file(data, file_name, csv_format)
from filmmakers import get_profiles as get_filmmakers_profile from megaphone import get_profiles as get_megaphone_profile from save import save_to_file # filmmakers_profiles = get_filmmakers_profile() megaphone_profiles = get_megaphone_profile() profiles = megaphone_profiles save_to_file(profiles)
def export(): date = extract_date() datas = result_db[0] save_to_file(datas, date) return send_file(f"COVID-19-{date}.csv")
from flask import Flask, render_template, send_file from covid import extract_data, extract_date from save import save_to_file date = extract_date() datas = extract_data() save_to_file(datas, date) app = Flask("COVID-19-global") result_db = {} @app.route("/") def home(): return render_template("home.html") @app.route("/result") def result(): datas = extract_data() result_db[0] = datas return render_template("result.html", datas=datas) @app.route("/export") def export(): date = extract_date() datas = result_db[0] save_to_file(datas, date)
from link import export_all from save import save_to_file datas = export_all() save_to_file(datas)
like = soup.find("div", {"class": "_1g06"}).text else: like = 0 # comments = soup.find_all("div", {"class": "_2a_i"}) # commentNum = len(comments) # for comment in comments: # if comment.find("span", {"class": "_4ayk"}): # num = comment.find("span", {"class": "_4ayk"}).text.split()[1] # commentNum += int(num) post_dic = {"name": name, "timeline": timeline, "post": post, "URL": page_url, "like": like} pages.append(post_dic) # add comments to the existed dictionary print("comments:", len(comments)) print("pages:", len(pages)) # for i in range(len(comments)): # post_dic = comments[i] # pages[i].update(post_dic) return pages df = pd.read_excel('./URLS/urls.xlsx') for i in range(len(df)): result = extract_pages(df["URL"][i]) save_to_file(result, df["NAME"][i])
import os import requests from bs4 import BeautifulSoup from extract_data import extract_jobs, extract_jobObj from save import save_to_file os.system("clear") alba_url = "http://www.alba.co.kr" results = requests.get(alba_url) soup = BeautifulSoup(results.text, "html.parser") company_box = soup.find("div", {"id": "MainSuperBrand"}).find_all("a", {"class": "goodsBox-info"}) for company in company_box: company_name = company.find("span", {"class":"company"}).text job_list = [] jobs = extract_jobs(company) for job in jobs: jobObj = extract_jobObj(job) job_list.append(jobObj) save_to_file(company_name, job_list)
from indeed import get_jobs as get_indeed_jobs from so import get_jobs as get_so_jobs from save import save_to_file so_jobs = get_so_jobs() indeed_jobs = get_indeed_jobs() jobs = so_jobs + indeed_jobs save_to_file(so_jobs)
from coex import get_events as cx_evt from kintex import get_events as KT from save import save_to_file KT = KT() CX = cx_evt() EV = KT + CX save_to_file(EV) print("Complete scrapping and saving the csv file!")
import requests import indeed from save import save_to_file max_indeed_page = indeed.extract_indeed_pages() indeed_jobs = indeed.extract_indeed_jobs(max_indeed_page) save_to_file(indeed_jobs)
from indeed import get_jobs as get_indeed_jobs from stackoverflow import get_jobs as get_so_jobs from save import save_to_file key = str(input("Input Keyword : ")) indeed_jobs = get_indeed_jobs(key) so_jobs = get_so_jobs(key) jobs = indeed_jobs + so_jobs save_to_file(jobs, key)
from indeed import get_jobs as get_indeed_jobs from stackoverflow import get_jobs as get_so_jobs from save import save_to_file so_jobs= get_so_jobs() indeed_jobs= get_indeed_jobs() jobs= so_jobs+indeed_jobs save_to_file(jobs)
from company import call_company from Scrapping import list_company from save import save_to_file words = call_company() company_information = list_company(words) save_to_file(company_information)