def grab_data_from_local_files(): companies = pd.read_csv('../data/us_companies.csv') print(companies.head()) cmp = pd.read_csv('../data/companies/microsoft.csv') print(cmp.head()) cur.execute("select * from positions limit 10") result = cur.fetchall() print(result) cur.execute("select * from visa_records limit 10") result = cur.fetchall() print(result)
def save2mysql(data=[], if_cache=True): if if_cache: data = pickle.load(open('sss', 'rb')) for row in data: sql = "INSERT INTO salaries (title, company, base_pay, positions) VALUES " sql += '("{}", "{}", {}, {})'.format(row[0], row[1], row[2], row[3]) try: cur.execute(sql) except Exception as e: print(e) print(sql) continue conn.commit()
def clean_save_job(data): github_jobs = data['github_jobs'] adzuna_jobs = data['adzuna_jobs'] jobs = [] # company, title, location, description, created, url for item in github_jobs: jobs.append({ 'company': item['company'], 'title': item['title'], 'location': item['location'], 'description': item['description'], 'created': item['created_at'], 'url': item['company_url'] }) for item in adzuna_jobs: jobs.append({ 'company': item['company']['display_name'], 'title': item['title'], 'location': item['location']['display_name'], 'description': item['description'], 'created': item['created'], 'url': item['redirect_url'] }) for item in jobs: reg = re.compile('<[^>]*>') content = reg.sub('', item['description']) content = re.sub('\"', '', content).strip() item['description'] = content sql = 'INSERT INTO positions (company, title, location, created, url, description) VALUES \ ("{}", "{}", "{}", "{}", "{}", """{}""")'.format(item['company'], item['title'], item['location'], item['created'], item['url'], item['description']) # sql = 'INSERT INTO positions (company, title, location, created, url) VALUES \ # ("{}\", '{}', '{}', '{}', '{}')""".format(item['company'], item['title'], # item['location'], # item['created'], item['url']) try: cur.execute(sql) except Exception as e: print(e) # print(sql) continue conn.commit()
def clean_save_visa(data): for i in range(len(data)): line = data[i] for item in line: sql = "INSERT INTO visa_records (rank, sponsor, num_of_LCA, ave_salary, ryear) VALUES " sql += "({}, '{}', {}, '{}', '{}')".format(item['rank'], item['sponsor'], int(item['num_of_LCA'].replace(',', '')), item['ave_salary'], item['year']) try: cur.execute(sql) except Exception as e: # print(e) # print(sql) continue cur.execute(sql) conn.commit()
def clean_save_company(): df = pd.read_csv('../data/us_companies.csv') for i, row in df.iterrows(): row['description'] = re.sub('\"', '', row['description']).strip() if type(row['city']) != str: location = row['state'] else: location = row['city'] + "," + row['state'] sql = "INSERT INTO companies (id, name, location, year_founded, category, description) VALUES " sql += '("{}", "{}", "{}", "{}", "{}", """{}""")'.format(row['company_name_id'], row['company_name'], location, row['year_founded'], row['company_category'], row['description']) try: cur.execute(sql) except Exception as e: print(e) print(sql) continue conn.commit()